Repository: OpenFreeEnergy/openfe
Branch: main
Commit: cb10892e79a2
Files: 465
Total size: 4.5 MB
Directory structure:
gitextract_wx9d1k3u/
├── .dockerignore
├── .git-blame-ignore-revs
├── .gitattributes
├── .github/
│ ├── CONTRIBUTING.md
│ ├── PULL_REQUEST_TEMPLATE/
│ │ └── release_template.md
│ ├── pull_request_template.md
│ └── workflows/
│ ├── aws-cpu-long-tests.yaml
│ ├── aws-gpu-integration-tests.yaml
│ ├── ci.yaml
│ ├── clean-pr-caches.yaml
│ ├── cron-conda.yaml
│ ├── cron-docker.yaml
│ ├── cron-feedstock-build-tests.yaml
│ ├── cron-package-test.yaml
│ ├── griffe-api-break.yaml
│ ├── mypy.yaml
│ ├── release-docker-image.yaml
│ ├── release-installers.yaml
│ ├── release-make-condalock.yaml
│ ├── release-prep-examplenotebooks.yaml
│ └── release-prep-feedstock.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CITATION.cff
├── Code_of_Conduct.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── codecov.yml
├── devtools/
│ ├── data/
│ │ ├── fix_rbfe_results.py
│ │ └── gen_serialized_results.py
│ ├── debug_openmm.sh
│ └── installer/
│ └── construct.yaml
├── docs/
│ ├── CHANGELOG.rst
│ ├── Makefile
│ ├── _ext/
│ │ └── sass.py
│ ├── _sass/
│ │ └── deflist-flowchart.scss
│ ├── _templates/
│ │ └── autosummary/
│ │ ├── base.rst
│ │ └── class.rst
│ ├── conf.py
│ ├── cookbook/
│ │ ├── bespoke_parameters.nblink
│ │ ├── choose_protocol.nblink
│ │ ├── create_alchemical_network.nblink
│ │ ├── dumping_transformation.rst
│ │ ├── generate_ligand_network.nblink
│ │ ├── hand_write_ligand_network.nblink
│ │ ├── index.rst
│ │ ├── jq_inspection.rst
│ │ ├── ligandnetwork_vis.nblink
│ │ ├── loading_molecules.nblink
│ │ ├── network_from_orion_fepp.nblink
│ │ ├── rfe_alchemical_planners.nblink
│ │ └── user_charges.nblink
│ ├── environment.yaml
│ ├── guide/
│ │ ├── cli/
│ │ │ ├── cli_basics.rst
│ │ │ ├── cli_yaml.rst
│ │ │ └── index.rst
│ │ ├── execution/
│ │ │ ├── execution_theory.rst
│ │ │ ├── index.rst
│ │ │ └── quickrun_execution.rst
│ │ ├── index.rst
│ │ ├── introduction.rst
│ │ ├── protocols/
│ │ │ ├── absolutebinding.rst
│ │ │ ├── absolutesolvation.rst
│ │ │ ├── index.rst
│ │ │ ├── plainmd.rst
│ │ │ ├── relativehybridtopology.rst
│ │ │ └── septop.rst
│ │ ├── results/
│ │ │ ├── index.rst
│ │ │ ├── working_with_networks.rst
│ │ │ └── working_with_results.rst
│ │ ├── setup/
│ │ │ ├── alchemical_network_model.rst
│ │ │ ├── chemical_systems_and_thermodynamic_cycles.rst
│ │ │ ├── creating_atom_mappings_and_scores.rst
│ │ │ ├── creating_ligand_networks.rst
│ │ │ ├── defining_protocols.rst
│ │ │ └── index.rst
│ │ ├── troubleshooting.rst
│ │ └── under_the_hood.rst
│ ├── index.rst
│ ├── installation.rst
│ ├── make.bat
│ ├── reference/
│ │ ├── api/
│ │ │ ├── alchemical_network_planning.rst
│ │ │ ├── atom_mappers.rst
│ │ │ ├── defining_and_executing_simulations.rst
│ │ │ ├── index.rst
│ │ │ ├── ligand_network.rst
│ │ │ ├── openmm_binding_afe.rst
│ │ │ ├── openmm_md.rst
│ │ │ ├── openmm_protocol_settings.rst
│ │ │ ├── openmm_rfe.rst
│ │ │ ├── openmm_septop.rst
│ │ │ ├── openmm_solvation_afe.rst
│ │ │ └── systems_and_components.rst
│ │ ├── cli/
│ │ │ ├── charge_molecules.rst
│ │ │ ├── gather.rst
│ │ │ ├── index.rst
│ │ │ ├── plan_rbfe_network.rst
│ │ │ ├── plan_rhfe_network.rst
│ │ │ └── quickrun.rst
│ │ └── index.rst
│ └── tutorials/
│ ├── .gitignore
│ ├── abfe_analysis_tutorial.nblink
│ ├── abfe_tutorial.nblink
│ ├── ahfe_tutorial.nblink
│ ├── charge_molecules_cli_tutorial.rst
│ ├── index.rst
│ ├── md_tutorial.nblink
│ ├── plotting_with_cinnabar.nblink
│ ├── rbfe_cli_tutorial.rst
│ ├── rbfe_membrane_protein.nblink
│ ├── rbfe_python_tutorial.nblink
│ ├── septop_analysis_tutorial.nblink
│ ├── septop_tutorial.nblink
│ └── showcase_notebook.nblink
├── environment.yml
├── news/
│ └── TEMPLATE.rst
├── production/
│ ├── Dockerfile
│ └── environment.yml
├── pyproject.toml
├── rever.xsh
└── src/
├── openfe/
│ ├── __init__.py
│ ├── analysis/
│ │ ├── __init__.py
│ │ └── plotting.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── _downloader.py
│ │ └── _registry.py
│ ├── due.py
│ ├── orchestration/
│ │ └── __init__.py
│ ├── protocols/
│ │ ├── __init__.py
│ │ ├── openmm_afe/
│ │ │ ├── __init__.py
│ │ │ ├── abfe_units.py
│ │ │ ├── afe_protocol_results.py
│ │ │ ├── ahfe_units.py
│ │ │ ├── base_afe_units.py
│ │ │ ├── equil_afe_settings.py
│ │ │ ├── equil_binding_afe_method.py
│ │ │ └── equil_solvation_afe_method.py
│ │ ├── openmm_md/
│ │ │ ├── __init__.py
│ │ │ ├── plain_md_methods.py
│ │ │ └── plain_md_settings.py
│ │ ├── openmm_rfe/
│ │ │ ├── __init__.py
│ │ │ ├── _rfe_utils/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── lambdaprotocol.py
│ │ │ │ ├── multistate.py
│ │ │ │ ├── relative.py
│ │ │ │ └── topologyhelpers.py
│ │ │ ├── equil_rfe_methods.py
│ │ │ ├── equil_rfe_settings.py
│ │ │ ├── hybridtop_protocol_results.py
│ │ │ ├── hybridtop_protocols.py
│ │ │ └── hybridtop_units.py
│ │ ├── openmm_septop/
│ │ │ ├── __init__.py
│ │ │ ├── base_units.py
│ │ │ ├── equil_septop_method.py
│ │ │ ├── equil_septop_settings.py
│ │ │ ├── septop_protocol_results.py
│ │ │ ├── septop_units.py
│ │ │ └── utils.py
│ │ ├── openmm_utils/
│ │ │ ├── __init__.py
│ │ │ ├── charge_generation.py
│ │ │ ├── mdtraj_utils.py
│ │ │ ├── multistate_analysis.py
│ │ │ ├── omm_compute.py
│ │ │ ├── omm_settings.py
│ │ │ ├── serialization.py
│ │ │ ├── settings_validation.py
│ │ │ ├── system_creation.py
│ │ │ └── system_validation.py
│ │ └── restraint_utils/
│ │ ├── __init__.py
│ │ ├── geometry/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── boresch/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── geometry.py
│ │ │ │ ├── guest.py
│ │ │ │ └── host.py
│ │ │ ├── flatbottom.py
│ │ │ ├── harmonic.py
│ │ │ └── utils.py
│ │ ├── openmm/
│ │ │ ├── __init__.py
│ │ │ ├── omm_forces.py
│ │ │ └── omm_restraints.py
│ │ └── settings.py
│ ├── setup/
│ │ ├── __init__.py
│ │ ├── alchemical_network_planner/
│ │ │ ├── __init__.py
│ │ │ ├── abstract_alchemical_network_planner.py
│ │ │ └── relative_alchemical_network_planner.py
│ │ ├── atom_mapping/
│ │ │ ├── __init__.py
│ │ │ ├── ligandatommapper.py
│ │ │ ├── lomap_mapper.py
│ │ │ ├── lomap_scorers.py
│ │ │ ├── perses_mapper.py
│ │ │ └── perses_scorers.py
│ │ ├── chemicalsystem_generator/
│ │ │ ├── __init__.py
│ │ │ ├── abstract_chemicalsystem_generator.py
│ │ │ └── easy_chemicalsystem_generator.py
│ │ └── ligand_network_planning.py
│ ├── storage/
│ │ ├── __init__.py
│ │ ├── metadatastore.py
│ │ ├── resultclient.py
│ │ └── resultserver.py
│ ├── tests/
│ │ ├── __init__.py
│ │ ├── analysis/
│ │ │ ├── __init__.py
│ │ │ └── test_plotting.py
│ │ ├── conftest.py
│ │ ├── data/
│ │ │ ├── 181l_only.pdb
│ │ │ ├── CN.sdf
│ │ │ ├── __init__.py
│ │ │ ├── a2a/
│ │ │ │ └── __init__.py
│ │ │ ├── benzene_modifications.sdf
│ │ │ ├── cdk8/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── cdk8_ligands.sdf
│ │ │ │ └── cdk8_protein.pdb
│ │ │ ├── eg5/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── eg5_cofactor.sdf
│ │ │ │ ├── eg5_ligands.sdf
│ │ │ │ └── eg5_protein.pdb
│ │ │ ├── external_formats/
│ │ │ │ ├── __init__.py
│ │ │ │ └── somebenzenes_edges.edge
│ │ │ ├── htf/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── chloroethane.sdf
│ │ │ │ ├── ethane.sdf
│ │ │ │ ├── fluoroethane.sdf
│ │ │ │ └── t4_lysozyme_data/
│ │ │ │ ├── benzene.sdf
│ │ │ │ ├── chlorobenzene.sdf
│ │ │ │ └── fluorobenzene.sdf
│ │ │ ├── lomap_basic/
│ │ │ │ ├── 1,3,7-trimethylnaphthalene.mol2
│ │ │ │ ├── 1-butyl-4-methylbenzene.mol2
│ │ │ │ ├── 2,6-dimethylnaphthalene.mol2
│ │ │ │ ├── 2-methyl-6-propylnaphthalene.mol2
│ │ │ │ ├── 2-methylnaphthalene.mol2
│ │ │ │ ├── 2-naftanol.mol2
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── methylcyclohexane.mol2
│ │ │ │ └── toluene.mol2
│ │ │ ├── multi_molecule.sdf
│ │ │ ├── openmm_afe/
│ │ │ │ ├── T4_abfe_system.xml.bz2
│ │ │ │ └── __init__.py
│ │ │ ├── openmm_md/
│ │ │ │ └── __init__.py
│ │ │ ├── openmm_rfe/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benzene_toluene_hybrid_top/
│ │ │ │ │ ├── hybrid_topology_atoms.csv
│ │ │ │ │ └── hybrid_topology_bonds.txt
│ │ │ │ ├── charged_benzenes.sdf
│ │ │ │ ├── dummy_charge_ligand_23.sdf
│ │ │ │ ├── dummy_charge_ligand_55.sdf
│ │ │ │ ├── ligand_23.sdf
│ │ │ │ ├── ligand_55.sdf
│ │ │ │ ├── malt1_shapefit_1832577-09-9.sdf
│ │ │ │ ├── malt1_shapefit_Pfizer-01-01.sdf
│ │ │ │ ├── reference.xml
│ │ │ │ ├── vacuum_nocoord.nc
│ │ │ │ └── vacuum_nocoord_checkpoint.nc
│ │ │ ├── openmm_septop/
│ │ │ │ ├── __init__.py
│ │ │ │ └── system.xml.bz2
│ │ │ └── serialization/
│ │ │ ├── __init__.py
│ │ │ ├── ethane_template.sdf
│ │ │ └── network_template.graphml
│ │ ├── dev/
│ │ │ ├── __init__.py
│ │ │ └── serialization_test_templates.py
│ │ ├── protocols/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── openmm_abfe/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conftest.py
│ │ │ │ ├── test_abfe_energies.py
│ │ │ │ ├── test_abfe_protocol.py
│ │ │ │ ├── test_abfe_protocol_results.py
│ │ │ │ ├── test_abfe_settings.py
│ │ │ │ ├── test_abfe_slow.py
│ │ │ │ ├── test_abfe_tokenization.py
│ │ │ │ ├── test_abfe_validation.py
│ │ │ │ └── utils.py
│ │ │ ├── openmm_ahfe/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_ahfe_protocol.py
│ │ │ │ ├── test_ahfe_protocol_results.py
│ │ │ │ ├── test_ahfe_resume.py
│ │ │ │ ├── test_ahfe_settings.py
│ │ │ │ ├── test_ahfe_slow.py
│ │ │ │ ├── test_ahfe_tokenization.py
│ │ │ │ ├── test_ahfe_validation.py
│ │ │ │ └── utils.py
│ │ │ ├── openmm_md/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_plain_md_protocol.py
│ │ │ │ ├── test_plain_md_resume.py
│ │ │ │ ├── test_plain_md_slow.py
│ │ │ │ └── test_plain_md_tokenization.py
│ │ │ ├── openmm_rfe/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── helpers.py
│ │ │ │ ├── test_hybrid_factory.py
│ │ │ │ ├── test_hybrid_top_protocol.py
│ │ │ │ ├── test_hybrid_top_resume.py
│ │ │ │ ├── test_hybrid_top_slow.py
│ │ │ │ ├── test_hybrid_top_tokenization.py
│ │ │ │ └── test_hybrid_top_validation.py
│ │ │ ├── openmm_septop/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conftest.py
│ │ │ │ ├── test_septop_protocol.py
│ │ │ │ ├── test_septop_protocol_results.py
│ │ │ │ ├── test_septop_resume.py
│ │ │ │ ├── test_septop_settings.py
│ │ │ │ ├── test_septop_slow.py
│ │ │ │ ├── test_septop_tokenization.py
│ │ │ │ ├── test_septop_validation.py
│ │ │ │ └── utils.py
│ │ │ ├── restraints/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_geometry_base.py
│ │ │ │ ├── test_geometry_boresch.py
│ │ │ │ ├── test_geometry_boresch_guest.py
│ │ │ │ ├── test_geometry_boresch_host.py
│ │ │ │ ├── test_geometry_flatbottom.py
│ │ │ │ ├── test_geometry_harmonic.py
│ │ │ │ ├── test_geometry_utils.py
│ │ │ │ ├── test_omm_restraints.py
│ │ │ │ ├── test_openmm_forces.py
│ │ │ │ └── test_settings.py
│ │ │ ├── test_openmm_settings.py
│ │ │ ├── test_openmmutils.py
│ │ │ └── test_openmmutils_serialization.py
│ │ ├── setup/
│ │ │ ├── __init__.py
│ │ │ ├── alchemical_network_planner/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── edge_types.py
│ │ │ │ └── test_relative_alchemical_network_planner.py
│ │ │ ├── atom_mapping/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conftest.py
│ │ │ │ ├── test_atommapper.py
│ │ │ │ ├── test_lomap_atommapper.py
│ │ │ │ ├── test_lomap_scorers.py
│ │ │ │ ├── test_perses_atommapper.py
│ │ │ │ └── test_perses_scorers.py
│ │ │ ├── chemicalsystem_generator/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── component_checks.py
│ │ │ │ └── test_easy_chemicalsystem_generator.py
│ │ │ └── test_network_planning.py
│ │ ├── storage/
│ │ │ ├── __init__.py
│ │ │ ├── conftest.py
│ │ │ ├── test_metadatastore.py
│ │ │ ├── test_resultclient.py
│ │ │ └── test_resultserver.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_atommapping_network_plotting.py
│ │ ├── test_duecredit.py
│ │ ├── test_log_control.py
│ │ ├── test_network_plotting.py
│ │ ├── test_optional_imports.py
│ │ ├── test_remove_oechem.py
│ │ ├── test_system_probe.py
│ │ └── test_visualization_3D.py
│ └── utils/
│ ├── __init__.py
│ ├── atommapping_network_plotting.py
│ ├── custom_typing.py
│ ├── ligand_utils.py
│ ├── logging_control.py
│ ├── network_plotting.py
│ ├── optional_imports.py
│ ├── remove_oechem.py
│ ├── silence_root_logging.py
│ ├── system_probe.py
│ └── visualization_3D.py
└── openfecli/
├── README.md
├── __init__.py
├── cli.py
├── clicktypes/
│ ├── __init__.py
│ └── hyphenchoice.py
├── commands/
│ ├── __init__.py
│ ├── atommapping.py
│ ├── fetch.py
│ ├── gather.py
│ ├── gather_abfe.py
│ ├── gather_septop.py
│ ├── generate_partial_charges.py
│ ├── plan_rbfe_network.py
│ ├── plan_rhfe_network.py
│ ├── quickrun.py
│ ├── test.py
│ └── view_ligand_network.py
├── data/
│ ├── __init__.py
│ └── _registry.py
├── fetchables.py
├── fetching.py
├── parameters/
│ ├── __init__.py
│ ├── mapper.py
│ ├── misc.py
│ ├── mol.py
│ ├── molecules.py
│ ├── output.py
│ ├── output_dir.py
│ ├── plan_network_options.py
│ ├── protein.py
│ └── utils.py
├── plan_alchemical_networks_utils.py
├── plugins.py
├── tests/
│ ├── __init__.py
│ ├── clicktypes/
│ │ └── test_hyphenchoice.py
│ ├── commands/
│ │ ├── __init__.py
│ │ ├── conftest.py
│ │ ├── test_atommapping.py
│ │ ├── test_charge_generation.py
│ │ ├── test_gather/
│ │ │ ├── test_abfe_full_results_multiple_units_dg_.tsv
│ │ │ ├── test_abfe_full_results_multiple_units_raw_.tsv
│ │ │ ├── test_abfe_full_results_single_unit_dg_.tsv
│ │ │ ├── test_abfe_full_results_single_unit_raw_.tsv
│ │ │ ├── test_abfe_single_repeat_multiple_units_dg_.tsv
│ │ │ ├── test_abfe_single_repeat_multiple_units_raw_.tsv
│ │ │ ├── test_abfe_single_repeat_single_unit_dg_.tsv
│ │ │ ├── test_abfe_single_repeat_single_unit_raw_.tsv
│ │ │ ├── test_cmet_failed_edge_ddg_.tsv
│ │ │ ├── test_cmet_failed_edge_raw_.tsv
│ │ │ ├── test_cmet_full_results_ddg_.tsv
│ │ │ ├── test_cmet_full_results_dg_.tsv
│ │ │ ├── test_cmet_full_results_raw_.tsv
│ │ │ ├── test_cmet_missing_all_complex_legs_allow_partial_ddg_.tsv
│ │ │ ├── test_cmet_missing_all_complex_legs_fail_ddg_.tsv
│ │ │ ├── test_cmet_missing_all_complex_legs_fail_dg_.tsv
│ │ │ ├── test_cmet_missing_complex_leg_ddg_.tsv
│ │ │ ├── test_cmet_missing_complex_leg_dg_.tsv
│ │ │ ├── test_cmet_missing_complex_leg_raw_.tsv
│ │ │ ├── test_cmet_missing_edge_ddg_.tsv
│ │ │ ├── test_cmet_missing_edge_dg_.tsv
│ │ │ ├── test_cmet_missing_edge_raw_.tsv
│ │ │ ├── test_septop_full_results_ddg_current_.tsv
│ │ │ ├── test_septop_full_results_ddg_pre_openfe_v1_11_.tsv
│ │ │ ├── test_septop_full_results_dg_current_.tsv
│ │ │ ├── test_septop_full_results_dg_pre_openfe_v1_11_.tsv
│ │ │ ├── test_septop_full_results_raw_current_.tsv
│ │ │ ├── test_septop_full_results_raw_pre_openfe_v1_11_.tsv
│ │ │ ├── test_septop_single_repeat_ddg_current_.tsv
│ │ │ ├── test_septop_single_repeat_ddg_pre_openfe_v1_11_.tsv
│ │ │ ├── test_septop_single_repeat_dg_current_.tsv
│ │ │ ├── test_septop_single_repeat_dg_pre_openfe_v1_11_.tsv
│ │ │ ├── test_septop_single_repeat_raw_current_.tsv
│ │ │ └── test_septop_single_repeat_raw_pre_openfe_v1_11_.tsv
│ │ ├── test_gather.py
│ │ ├── test_ligand_network_viewer.py
│ │ ├── test_plan_rbfe_network.py
│ │ ├── test_plan_rhfe_network.py
│ │ ├── test_quickrun.py
│ │ └── test_test.py
│ ├── conftest.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── bad_transformation.json
│ │ ├── rbfe_tutorial/
│ │ │ ├── __init__.py
│ │ │ ├── tyk2_ligands.sdf
│ │ │ └── tyk2_protein.pdb
│ │ └── transformation.json
│ ├── dev/
│ │ ├── __init__.py
│ │ └── write_transformation_json.py
│ ├── parameters/
│ │ ├── __init__.py
│ │ ├── test_mapper.py
│ │ ├── test_mol.py
│ │ ├── test_molecules.py
│ │ ├── test_output.py
│ │ ├── test_output_dir.py
│ │ ├── test_plan_network_options.py
│ │ ├── test_protein.py
│ │ └── test_utils.py
│ ├── test_cli.py
│ ├── test_fetchables.py
│ ├── test_fetching.py
│ ├── test_plugins.py
│ ├── test_rbfe_tutorial.py
│ ├── test_utils.py
│ └── utils.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
# Ignore everything
*
# Only allow
!production/environment.yml
================================================
FILE: .git-blame-ignore-revs
================================================
# https://github.com/OpenFreeEnergy/openfe/pull/1604 - ruff formatting part 1
3a08b6809fc57662e4146db3c7ccedfbc7c7c8df
# https://github.com/OpenFreeEnergy/openfe/pull/1610 - ruff formatting part 2
2311a2f2d956dd30e95c180841ce19b921d89e1f
# https://github.com/OpenFreeEnergy/openfe/pull/1622 - ruff formatting part 3
d7196d119e2f65d88e488afc665f2521e4f68042
# https://github.com/OpenFreeEnergy/openfe/pull/1623 - ruff formatting part 4
036869ae81670c6dcfa2532f125ee88c3f35936c
# https://github.com/OpenFreeEnergy/openfe/pull/1665 - ruff isort
588f552ca9200a99fd77aed993ea3766b154ee53
# https://github.com/OpenFreeEnergy/openfe/pull/1667 - ruff f-strings and whitespace
18f211db974cdde38a5d88d6e74aaaf78fda8897
# https://github.com/OpenFreeEnergy/openfe/pull/1668 - ruff pycodestyle changes
b693d37c8ac0e30283bd8b5f13386fdc98901cf8
================================================
FILE: .gitattributes
================================================
================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing to OpenFE
Thanks for contributing to the OpenFE software project!
Read our [code of conduct](../Code_of_Conduct.md) to understand the standards you must adhere to.
## Questions
If you have any questions on using the OpenFE package, reach out on the "Discussions" tab above to start a conversation!
We are happy to get you started in using our software.
## Issues
If you think you have encountered a software issue, please raise this on the "Issues" tab in Github.
In general the more details you can provide the better,
we recommend reading section 3.3 of [this article](https://livecomsjournal.org/index.php/livecoms/article/view/v3i1e1473)
to understand the problem solving process.
## Contributing
We welcome any fixes or code contributions.
Note that any contributions made must be made under a MIT license.
Feel free to reach out to the developer team who can assist you in this process.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE/release_template.md
================================================
Make the PR:
* [ ] Create a new release prep branch corresponding to the version name, e.g. `release/v1.2.0`. Note: please follow [semantic versioning](https://semver.org/).
* [ ] Check that all user-relevant updates are included in the `news/` rever `.rst` files. You can backfill any additional items by making a new .rst, e.g. `backfill.rst`
* [ ] Run [rever](https://regro.github.io/rever-docs/index.html#), e.g. `rever 1.2.0`. This will auto-commit `docs/CHANGELOG.md` and remove the `.rst` files from `news/`.
* [ ] Verify that`docs/CHANGELOG.rst` looks correct and that it renders as expected in the docs preview.
* [ ] If needed, create a release of the [example notebooks repository](https://github.com/OpenFreeEnergy/ExampleNotebooks) and update the pinned release version in the `openfe/docs/conf.py`.
* [ ] Make the PR and verify that CI/CD passes.
* [ ] [feedstock packaging tests](https://github.com/OpenFreeEnergy/openfe/actions/workflows/release-prep-feedstock.yaml)
* [ ] [example notebooks](https://github.com/OpenFreeEnergy/openfe/actions/workflows/release-prep-examplenotebooks.yaml)
* [ ] [GPU tests](https://github.com/OpenFreeEnergy/openfe/actions/workflows/aws-gpu-integration-tests.yaml)
* [ ] Merge the PR into `main`.
After Merging the PR [follow this guide](https://github.com/OpenFreeEnergy/openfe/wiki/How-to-create-a-new-release)
================================================
FILE: .github/pull_request_template.md
================================================
Checklist
* [ ] All new code is appropriately documented (user-facing code _must_ have complete docstrings).
* [ ] Added a ``news`` entry, or the changes are not user-facing.
* [ ] Ran pre-commit: you can run [pre-commit](https://pre-commit.com) locally or comment on this PR with `pre-commit.ci autofix`.
Manual Tests: these are slow so don't need to be run every commit, only before merging and when relevant changes are made (generally at reviewer-discretion).
* [ ] [GPU integration tests](https://github.com/OpenFreeEnergy/openfe/actions/workflows/aws-gpu-integration-tests.yaml)
* [ ] [example notebook testing](https://github.com/OpenFreeEnergy/openfe/actions/workflows/release-prep-examplenotebooks.yaml)
* [ ] [packaging tests](https://github.com/OpenFreeEnergy/openfe/actions/workflows/cron-package-test.yaml): run this for any large feature PRs or PRs that add test data.
## Developers certificate of origin
- [ ] I certify that this contribution is covered by the MIT License [here](https://github.com/OpenFreeEnergy/openfe/blob/main/LICENSE) and the **Developer Certificate of Origin** at .
================================================
FILE: .github/workflows/aws-cpu-long-tests.yaml
================================================
name: "manual AWS: CPU long tests"
on:
workflow_dispatch:
jobs:
start-aws-runner:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
outputs:
mapping: ${{ steps.aws-start.outputs.mapping }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::010438489691:role/GHARunnerAWS
aws-region: us-east-2
- name: Create cloud runner
id: aws-start
uses: omsf/start-aws-gha-runner@v1.0.0
with:
aws_image_id: ami-0b7f661c228e6a4bb
aws_instance_type: c7i.xlarge
aws_home_dir: /home/ubuntu
aws_root_device_size: 125
env:
GH_PAT: ${{ secrets.GH_PAT }}
self-hosted-test:
runs-on: self-hosted
timeout-minutes: 720 # 12 hours
defaults:
run:
shell: bash -leo pipefail {0}
env:
OE_LICENSE: ${{ github.workspace }}/oe_license.txt
needs:
- start-aws-runner
steps:
- uses: actions/checkout@v4
- name: Print disk usage
run: "df -h"
- name: Print Docker details
run: "docker version || true"
- name: "Setup Micromamba"
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
condarc: |
channels:
- conda-forge
- openeye
create-args: >-
espaloma_charge==0.0.8
espaloma==0.4.0
openeye-toolkits
python=3.12
- name: "Check if OpenMM can get a GPU"
run: python -m openmm.testInstallation
- name: "Install"
run: python -m pip install --no-deps -e .
- name: "Test imports"
run: |
# if we add more to this, consider changing to for + env vars
python -Ic "import openfe; print(openfe.__version__)"
- name: "Environment Information"
run: |
micromamba info
micromamba list
pip list
- name: Test OE License & Write License to File
env:
OE_LICENSE_TEXT: ${{ secrets.OE_LICENSE }}
run: |
echo "${OE_LICENSE_TEXT}" > ${OE_LICENSE}
python -c "import openeye; assert openeye.oechem.OEChemIsLicensed(), 'OpenEye license checks failed!'"
- name: "Run tests"
env:
OFE_SLOW_TESTS: "true"
DUECREDIT_ENABLE: 'yes'
OFE_INTEGRATION_TESTS: FALSE
run: |
pytest -n logical -vv --durations=10 --runslow src/openfecli/tests/ src/openfe/tests/
stop-aws-runner:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
needs:
- start-aws-runner
- self-hosted-test
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::010438489691:role/GHARunnerAWS
aws-region: us-east-2
- name: Stop instances
uses: omsf/stop-aws-gha-runner@v1.0.0
with:
instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }}
env:
GH_PAT: ${{ secrets.GH_PAT }}
================================================
FILE: .github/workflows/aws-gpu-integration-tests.yaml
================================================
name: "manual AWS: GPU integration tests"
on:
workflow_dispatch:
jobs:
start-aws-runner:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
outputs:
mapping: ${{ steps.aws-start.outputs.mapping }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::010438489691:role/GHARunnerAWS
aws-region: us-east-2
- name: Create cloud runner
id: aws-start
uses: omsf/start-aws-gha-runner@v1.0.0
with:
aws_image_id: ami-076a54ed41e67782d
aws_instance_type: g4dn.xlarge
aws_home_dir: /home/ubuntu
aws_root_device_size: 125
env:
GH_PAT: ${{ secrets.GH_PAT }}
self-hosted-test:
runs-on: self-hosted
timeout-minutes: 720 # 12 hours
defaults:
run:
shell: bash -leo pipefail {0}
env:
OE_LICENSE: ${{ github.workspace }}/oe_license.txt
needs:
- start-aws-runner
steps:
- uses: actions/checkout@v4
- name: Print disk usage
run: "df -h"
- name: Print Docker details
run: "docker version || true"
- name: Check for nvidia-smi
run: "nvidia-smi"
- name: "Setup Micromamba"
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
condarc: |
channels:
- conda-forge
- openeye
create-args: >-
espaloma_charge==0.0.8
espaloma==0.4.0
openeye-toolkits
python=3.12
cuda-version=12.8
- name: "Check if OpenMM can get a GPU"
run: python -m openmm.testInstallation
- name: "Install"
run: python -m pip install --no-deps -e .
- name: "Test imports"
run: |
# if we add more to this, consider changing to for + env vars
python -Ic "import openfe; print(openfe.__version__)"
- name: "Environment Information"
run: |
micromamba info
micromamba list
pip list
- name: Test OE License & Write License to File
env:
OE_LICENSE_TEXT: ${{ secrets.OE_LICENSE }}
run: |
echo "${OE_LICENSE_TEXT}" > ${OE_LICENSE}
python -c "import openeye; assert openeye.oechem.OEChemIsLicensed(), 'OpenEye license checks failed!'"
- name: "Run tests"
env:
DUECREDIT_ENABLE: 'yes'
OFE_INTEGRATION_TESTS: TRUE
run: |
# The -m flag will only run tests with @pytest.mark.integration
pytest -n logical -vv --durations=10 -m integration src/openfecli/tests/ src/openfe/tests/
stop-aws-runner:
runs-on: ubuntu-latest
permissions:
id-token: write
contents: read
needs:
- start-aws-runner
- self-hosted-test
if: ${{ always() }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::010438489691:role/GHARunnerAWS
aws-region: us-east-2
- name: Stop instances
uses: omsf/stop-aws-gha-runner@v1.0.0
with:
instance_mapping: ${{ needs.start-aws-runner.outputs.mapping }}
env:
GH_PAT: ${{ secrets.GH_PAT }}
================================================
FILE: .github/workflows/ci.yaml
================================================
name: "CI"
on:
pull_request:
# Skip CI if changed files only affect the following folders
# - docs: documentation changes don't need code validation
# See here for more details: https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#example-excluding-paths
paths-ignore:
- "docs/*"
- "news/*"
- ".readthedocs.yaml"
- ".github/workflows/cpu-long-tests.yaml"
- ".github/workflows/gpu-integration-tests.yaml"
push:
branches:
- main
schedule:
- cron: "0 4 * * *"
workflow_dispatch:
release:
types:
- published
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
tests:
runs-on: ${{ matrix.os }}
name: "💻-${{matrix.os }} 🐍-${{ matrix.python-version }} oechem: ${{ matrix.openeye }}"
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
openeye: ["no"]
python-version:
- "3.11"
- "3.12"
- "3.13"
include:
- os: "ubuntu-latest"
python-version: "3.13"
openeye: "yes"
- os: "macos-latest"
python-version: "3.12"
openeye: "no"
env:
OE_LICENSE: ${{ github.workspace }}/oe_license.txt
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get current date
id: date
run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
- name: "Setup Micromamba"
if: ${{ matrix.python-version != '3.13' }}
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
cache-environment: true
cache-downloads: true
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
create-args: >-
espaloma=0.4.0
python=${{ matrix.python-version }}
pydantic=${{ matrix.pydantic-version }}
init-shell: bash
- name: "Setup Micromamba"
# Can't install espaloma with python 3.13
if: ${{ matrix.python-version == '3.13' }}
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
cache-environment: true
cache-downloads: true
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
create-args: >-
python=${{ matrix.python-version }}
pydantic=${{ matrix.pydantic-version }}
init-shell: bash
- name: "Install OpenEye"
if: ${{ !github.event.pull_request.head.repo.fork
&& matrix.openeye == 'yes' }}
env:
OE_LICENSE_TEXT: ${{ secrets.OE_LICENSE }}
run: |
echo "${OE_LICENSE_TEXT}" > ${OE_LICENSE}
micromamba install -c openeye openeye-toolkits
python -c "import openeye; assert openeye.oechem.OEChemIsLicensed(), 'oechem license check failed!'"
- name: "Install"
run: python -m pip install --no-deps -e .
- name: "Environment Information"
run: |
micromamba info
micromamba list
pip list
- name: "Test imports"
run: |
# if we add more to this, consider changing to for + env vars
python -Ic "import openfe; print(openfe.__version__)"
- name: Cache Pooch data
uses: actions/cache@v4
with:
path: |
# linux cache location
~/.cache/openfe
# osx cache location
~/Library/Caches/openfe
# When files are added or changed in a pooch registry
# bump this key to create a new cache, for example if
# the key is pooch-${{ matrix.os }}-1 change it to pooch-${{ matrix.os }}-2
key: pooch-${{ matrix.os }}-1
- name: "Run tests"
env:
# Set the OFE_SLOW_TESTS to True if running a Cron job
OFE_SLOW_TESTS: ${{ fromJSON('{"false":"false","true":"true"}')[github.event_name != 'pull_request'] }}
DUECREDIT_ENABLE: 'yes'
run: |
pytest -n auto -v --cov=openfe --cov=openfecli --cov-report=xml --durations=10
- name: codecov-pr
if: ${{ github.repository == 'OpenFreeEnergy/openfe'
&& github.event_name == 'pull_request' }}
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
fail_ci_if_error: False
verbose: True
flags: fast-tests
- name: codecov-merge
# we only want to upload a slow report if
# 1) it isn't a schedule run
# 2) it wasn't from a PR (we don't run slow tests on PRs)
if: ${{ github.repository == 'OpenFreeEnergy/openfe'
&& github.event_name != 'schedule'
&& github.event_name != 'pull_request' }}
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: coverage.xml
fail_ci_if_error: False
verbose: True
flags: slow-tests
================================================
FILE: .github/workflows/clean-pr-caches.yaml
================================================
# from https://docs.github.com/en/actions/how-tos/manage-workflow-runs/manage-caches#force-deleting-cache-entries
name: "clean up github runner caches on closed pull requests"
on:
workflow_dispatch:
pull_request:
types:
- closed
jobs:
cleanup:
runs-on: ubuntu-latest
permissions:
actions: write
steps:
- name: Cleanup
run: |
echo "Fetching list of cache keys"
cacheKeysForPR=$(gh cache list --ref $BRANCH --limit 100 --json id --jq '.[].id')
## Setting this to not fail the workflow while deleting cache keys.
set +e
echo "Deleting caches..."
for cacheKey in $cacheKeysForPR
do
gh cache delete $cacheKey
done
echo "Done"
env:
GH_TOKEN: ${{ github.token }}
GH_REPO: ${{ github.repository }}
BRANCH: refs/pull/${{ github.event.pull_request.number }}/merge
================================================
FILE: .github/workflows/cron-conda.yaml
================================================
name: "cron: conda builds daily tests"
on:
workflow_dispatch:
schedule:
# At 05:00 UTC every day
- cron: "0 5 * * *"
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
condacheck:
runs-on: ${{ matrix.OS }}
name: "daily conda check"
strategy:
fail-fast: false
matrix:
os: ['ubuntu-latest']
python-version:
- "3.11"
- "3.12"
- "3.13"
include:
- os: "macos-latest"
python-version: "3.12"
openeye: "no"
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
path: openfe_repo
- name: Get Latest Version
id: latest-version
working-directory: openfe_repo
run: |
REPO="${{ github.repository }}"
VERSION=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Setup Micromamba and Install openfe
uses: mamba-org/setup-micromamba@v2
with:
environment-name: openfe
create-args: >-
python=${{ matrix.python-version }}
openfe=${{ steps.latest-version.outputs.VERSION }}
pytest
pytest-xdist
condarc: |
channels:
- conda-forge
init-shell: bash
- name: "env info"
run: |
micromamba info
micromamba list
- id: run-tests
name: "Run tests"
run: |
# note: this only runs the fast tests
pytest -n auto --pyargs openfe openfecli
================================================
FILE: .github/workflows/cron-docker.yaml
================================================
name: "cron: docker image daily tests"
on:
push:
branches:
- main
schedule:
# nightly tests
- cron: "0 14 * * 0"
workflow_dispatch:
defaults:
run:
shell: bash -leo pipefail {0}
env:
REGISTRY: ghcr.io
IMAGE_NAME: openfreeenergy/openfe
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Use dev tag for nightly builds
id: latest-version
run: |
VERSION=dev
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Print Latest Version
run: echo ${{ steps.latest-version.outputs.VERSION }}
- name: Create fully qualified image registry path
id: fqirp
run: |
FQIRP=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}
echo "FQIRP=$FQIRP" >> $GITHUB_OUTPUT
- name: Print FQIRP
run: echo ${{ steps.fqirp.outputs.FQIRP }}
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=schedule,pattern=nightly,enable=true,priority=1000
type=ref,event=branch,enable=true,priority=600
type=ref,event=tag,enable=true,priority=600
type=ref,event=pr,prefix=pr-,enable=true,priority=600
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{version}}
type=sha
${{ steps.latest-version.outputs.VERSION }}
- name: Build and export to Docker
uses: docker/build-push-action@v6
with:
context: .
file: production/Dockerfile
load: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Test image
run: |
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} openfe --help
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} openfe --version
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} python -c "import gufe; print(f'{gufe.__version__=}')"
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs gufe -v
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs openfe -v
================================================
FILE: .github/workflows/cron-feedstock-build-tests.yaml
================================================
# tests this openfe commit and gufe main to check for
# conda-feedstock build issues
name: "cron: weekly feedstock package build tests"
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
on:
workflow_dispatch:
schedule:
# 3 am weekly on monday
- cron: "0 3 * * MON"
jobs:
test-conda-build:
runs-on: ubuntu-latest
steps:
- name: Checkout openfe repository
uses: actions/checkout@v4
with:
path: openfe
- name: Checkout conda-forge feedstock
uses: actions/checkout@v4
with:
repository: conda-forge/openfe-feedstock
path: openfe-feedstock
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install conda-build dependencies
run: |
pip install pyyaml
# TODO just checkout the repo where we need it?
- name: Copy source code to recipe folder
run: cp -r openfe openfe-feedstock/recipe/openfe_source
- name: Modify feedstock to use local path
run: |
cd openfe-feedstock
# Backup original recipe.yaml
cp recipe/recipe.yaml recipe/recipe.yaml.bak
# NOTE: now that we use v1 feedstock, we can use yq to directly parse the YAML here.
# Add path after source: and delete url line
sed -i '/^source:/a\ path: ./openfe_source' recipe/recipe.yaml
sed -i '/^ url:/d' recipe/recipe.yaml
echo "Modified recipe.yaml:"
cat recipe/recipe.yaml
- name: Run conda-forge build test
run: |
cd openfe-feedstock
python build-locally.py
continue-on-error: true
id: build_test
# Uncomment if build_artifacts is needed to troubleshoot build
# - name: Upload build logs
# if: always()
# uses: actions/upload-artifact@v4
# with:
# name: conda-build-logs
# path: |
# openfe-feedstock/build_artifacts/
# openfe-feedstock/recipe/recipe.yaml
# openfe-feedstock/recipe/recipe.yaml.bak
# if-no-files-found: warn
- name: Check build status
if: steps.build_test.outcome == 'failure'
run: |
echo "❌ Conda forge build test failed. Check the uploaded logs for details."
exit 1
================================================
FILE: .github/workflows/cron-package-test.yaml
================================================
name: "cron: package install daily tests"
on:
workflow_dispatch:
schedule:
# At 03:00 UTC daily
- cron: "0 3 * * *"
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
package-tests:
runs-on: ubuntu-latest
name: "main branch long tests"
steps:
- uses: actions/checkout@v4
- name: Get current date
id: date
run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
- name: "Setup Micromamba"
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
cache-environment: true
cache-downloads: true
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
create-args: >-
python=3.12
init-shell: bash
- name: "install extra deps"
run: pip install pipx wheel twine readme-renderer
- name: "build sdist"
run: pipx run build --sdist --outdir dist
- name: "check package build"
run: |
dist=$(ls -t1 dist/openfe-*tar.gz | head -n1)
test -n "${dist}" || { echo "no distribution found"; exit 1; }
twine check $dist
- name: "install from source dist"
working-directory: ./dist
run: python -m pip install openfe-*tar.gz
- name: "run tests"
working-directory: ./dist
env:
OFE_SLOW_TESTS: "true"
run: |
pytest -n auto -v --pyargs openfe.tests
pytest -n auto -v --pyargs openfecli.tests
================================================
FILE: .github/workflows/griffe-api-break.yaml
================================================
name: "PR: griffe check for API breaks"
on:
pull_request_target:
branches:
- main
jobs:
check:
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- run: git fetch --depth=1 --tags
- uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Check for API breaks
continue-on-error: true
id: check
run: |
pip install griffe
griffe check "openfe" -s src --verbose -a origin/main
griffe check "openfecli" -s src --verbose -a origin/main
- name: Manage PR Comments
uses: actions/github-script@v7
with:
script: |
const prNumber = context.payload.pull_request.number;
const identifier = '';
const runUrl = `${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
const stepUrl = `${runUrl}#step:check`;
// Determine the outcome of the check step
const checkStepOutcome = '${{ steps.check.outcome }}';
// List existing comments
const { data: comments } = await github.rest.issues.listComments({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
});
// Delete previous comments from this action
for (const comment of comments) {
if (comment.body.includes(identifier)) {
await github.rest.issues.deleteComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: comment.id,
});
}
}
// Post a new comment only if the check step failed
if (checkStepOutcome === 'failure') {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: `${identifier}\n🚨 API breaking changes detected! 🚨\n[View logs for this step](${stepUrl})`
});
} else {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: `${identifier}\nNo API break detected ✅`
});
}
================================================
FILE: .github/workflows/mypy.yaml
================================================
name: "PR: mypy static type checking"
on:
pull_request:
branches:
- main
push:
branches:
- main
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
mypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Get current date
id: date
run: echo "date=$(date +%Y-%m-%d)" >> "${GITHUB_OUTPUT}"
- name: "Setup Micromamba"
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
environment-name: openfe_env
cache-environment: true
cache-downloads: true
cache-environment-key: environment-${{ steps.date.outputs.date }}
cache-downloads-key: downloads-${{ steps.date.outputs.date }}
create-args: >-
python=3.12
mypy>=1.17.0
init-shell: bash
- name: "Install steps"
run: |
python -m pip install --no-deps -e .
- name: "Environment Information"
run: |
micromamba info
micromamba list
- name: "Lint with mypy"
run: mypy
================================================
FILE: .github/workflows/release-docker-image.yaml
================================================
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
# GitHub recommends pinning actions to a commit SHA.
# To get a newer version, you will need to update the SHA.
# You can also reference a tag or branch, but the action may change without warning.
# Workflow to automate docker image building during the openfe release process.
name: "release: create and publish a docker image"
on:
workflow_dispatch:
defaults:
run:
shell: bash -leo pipefail {0}
env:
REGISTRY: ghcr.io
IMAGE_NAME: openfreeenergy/openfe
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
# see https://dev.to/mathio/squeezing-disk-space-from-github-actions-runners-an-engineers-guide-3pjg
- name: Aggressive cleanup
run: |
# remove unneeded packages to avoid running out of memory
# Remove Java (JDKs)
sudo rm -rf /usr/lib/jvm
# Remove .NET SDKs
sudo rm -rf /usr/share/dotnet
# Remove Swift toolchain
sudo rm -rf /usr/share/swift
# Remove Haskell (GHC)
sudo rm -rf /usr/local/.ghcup
# Remove Julia
sudo rm -rf /usr/local/julia*
# Remove Android SDKs
sudo rm -rf /usr/local/lib/android
# Remove Chromium (optional if not using for browser tests)
sudo rm -rf /usr/local/share/chromium
# Remove Microsoft/Edge and Google Chrome builds
sudo rm -rf /opt/microsoft /opt/google
# Remove Azure CLI
sudo rm -rf /opt/az
# Remove PowerShell
sudo rm -rf /usr/local/share/powershell
# Remove CodeQL and other toolcaches
sudo rm -rf /opt/hostedtoolcache
docker system prune -af || true
docker builder prune -af || true
df -h
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get Latest Version
id: latest-version
run: |
REPO="${{ github.repository }}"
VERSION=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Print Latest Version
run: echo ${{ steps.latest-version.outputs.VERSION }}
- name: Create fully qualified image registry path
id: fqirp
run: |
FQIRP=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}
echo "FQIRP=$FQIRP" >> $GITHUB_OUTPUT
- name: Print FQIRP
run: echo ${{ steps.fqirp.outputs.FQIRP }}
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=schedule,pattern=nightly,enable=true,priority=1000
type=ref,event=branch,enable=true,priority=600
type=ref,event=tag,enable=true,priority=600
type=ref,event=pr,prefix=pr-,enable=true,priority=600
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{version}}
type=sha
${{ steps.latest-version.outputs.VERSION }}
- name: Build and export to Docker
uses: docker/build-push-action@v6
with:
context: .
file: production/Dockerfile
load: true
push: false
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
VERSION=${{ steps.latest-version.outputs.VERSION }}
- name: Test image
run: |
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} openfe --help
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} openfe --version
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} python -c "import gufe; print(f'{gufe.__version__=}')"
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs gufe -v
docker run --rm ${{ steps.fqirp.outputs.FQIRP }} pytest --pyargs openfe openfecli -v
- name: Push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
with:
context: .
file: production/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: |
VERSION=${{ steps.latest-version.outputs.VERSION }}
- name: Setup Apptainer
uses: eWaterCycle/setup-apptainer@v2
with:
apptainer-version: 1.3.4
- name: Build Apptainer Image
run: singularity build openfe_${{ steps.latest-version.outputs.VERSION }}.sif docker-daemon:${{ steps.fqirp.outputs.FQIRP }}
- name: Test & Push Apptainer Image
run: |
mkdir test_apptainer
cd test_apptainer
singularity run ../openfe_${{ steps.latest-version.outputs.VERSION }}.sif openfe --help
singularity run ../openfe_${{ steps.latest-version.outputs.VERSION }}.sif openfe --version
singularity run ../openfe_${{ steps.latest-version.outputs.VERSION }}.sif pytest --pyargs openfe openfecli -v -n auto
echo ${{ secrets.GITHUB_TOKEN }} | singularity remote login -u ${{ secrets.GHCR_USERNAME }} --password-stdin oras://ghcr.io
singularity push ../openfe_${{ steps.latest-version.outputs.VERSION }}.sif oras://${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.latest-version.outputs.VERSION }}-apptainer
singularity push ../openfe_${{ steps.latest-version.outputs.VERSION }}.sif oras://${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest-apptainer
================================================
FILE: .github/workflows/release-installers.yaml
================================================
name: "release: make single-file installers"
on:
workflow_dispatch:
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
test:
name: Building single file installer on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [macos-latest, ubuntu-latest]
steps:
- name: Checkout Code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get Latest Version
id: latest-version
run: |
REPO="${{ github.repository }}"
VERSION=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Install constructor environment with Micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-name: installer
create-args: >-
python=3.11
jinja2
constructor
init-shell: bash
- name: Create installer
run: VERSION=${{ steps.latest-version.outputs.VERSION }} constructor devtools/installer/
- name: Get installer file name
id: file-name
run: |
# This should work as long as we don't have any *.sh files in our root dir
FILE_NAME=$(find * -maxdepth 0 -type f -name "*.sh")
echo $FILE_NAME
echo "FILE_NAME=$FILE_NAME" >> $GITHUB_OUTPUT
- name: Test installer
run: |
chmod +x ${{ steps.file-name.outputs.FILE_NAME }}
./${{ steps.file-name.outputs.FILE_NAME }} -b
export PATH="$HOME/openfeforge/bin:$PATH"
OFE_SLOW_TESTS=FALSE pytest -v --pyargs openfe
# Copy for "latest" release by removing version
# Inspired by https://github.com/conda-forge/miniforge/blob/main/.github/workflows/ci.yml
cp ${{ steps.file-name.outputs.FILE_NAME }} $(echo ${{ steps.file-name.outputs.FILE_NAME }} | sed -e 's/-[^-]*//')
- uses: actions/upload-artifact@v4
with:
name: ${{ steps.file-name.outputs.FILE_NAME }}
path: OpenFEforge*
if-no-files-found: error
- name: Upload openfe forge to release
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: OpenFEforge*
tag: ${{ github.ref }}
overwrite: true
file_glob: true
if: startsWith(github.ref, 'refs/tags/')
================================================
FILE: .github/workflows/release-make-condalock.yaml
================================================
name: "release: create openfe conda-lock file"
on:
workflow_dispatch:
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
create-conda-lock-file-and-test-linux:
runs-on: ubuntu-latest
steps:
- name: Install conda-lock with Micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-name: conda-lock
create-args: >-
conda-lock
# This saves me some time since we only need the latest tag
- name: Get latest tag
id: latest-version
run: |
REPO="${{ github.repository }}"
VERSION=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Print Latest Version
run: echo ${{ steps.latest-version.outputs.VERSION }}
- name: Create environment file to lock
run: |
cat > environment-to-lock.yaml << 'EOF'
name: openfe_env
channels:
- conda-forge
platforms:
- linux-64
- osx-arm64
dependencies:
- openfe==${{ steps.latest-version.outputs.VERSION }}
- python=3.12
EOF
- name: Generate lock files
run: |
conda lock --with-cuda 11.8 -f environment-to-lock.yaml --lockfile openfe-conda-lock.yml
cp openfe-conda-lock.yml openfe-${{ steps.latest-version.outputs.VERSION }}-conda-lock.yml
- name: Test lock file (linux)
run: |
conda-lock install -p /home/runner/micromamba/envs/lf-test openfe-conda-lock.yml
micromamba activate /home/runner/micromamba/envs/lf-test
openfe test
- name: Upload file as artifact
uses: actions/upload-artifact@v4
with:
name: conda-lock-files
path: "*conda-lock.yml"
test-osx-lock-file:
needs: create-conda-lock-file-and-test-linux
runs-on: macos-latest
steps:
- name: Download artifact
uses: actions/download-artifact@v4
with:
name: conda-lock-files
- name: Install conda-lock with Micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-name: conda-lock
create-args: >-
conda-lock
- name: Test lock file (osx)
run: |
conda-lock install -p /Users/runner/micromamba/envs/lf-test openfe-conda-lock.yml
micromamba activate /Users/runner/micromamba/envs/lf-test
openfe test
- name: Upload lock files to release
uses: svenstaro/upload-release-action@v2
if: startsWith(github.ref, 'refs/tags/')
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: "*conda-lock.yml"
tag: ${{ github.ref }}
overwrite: true
file_glob: true
================================================
FILE: .github/workflows/release-prep-examplenotebooks.yaml
================================================
name: "release prep: test example notebooks"
on:
workflow_dispatch:
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
jobs:
test-example-notebooks:
runs-on: ubuntu-latest
steps:
- name: Checkout openfe repository
uses: actions/checkout@v4
with:
path: openfe
- name: Checkout example notebooks
uses: actions/checkout@v4
with:
repository: openfreeenergy/ExampleNotebooks
path: example-notebooks
- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v2
with:
environment-file: openfe/environment.yml
environment-name: openfe_env
create-args: >-
python=3.12
nbval
init-shell: bash
- name: Install OpenFE
run: python -m pip install --no-deps -e ./openfe
- name: Environment Information
run: |
micromamba info
micromamba list
- name: Run example notebooks
run: |
cd example-notebooks
python -m pytest -v --nbval-lax --nbval-cell-timeout=3000 -n auto --dist loadscope
================================================
FILE: .github/workflows/release-prep-feedstock.yaml
================================================
# tests this openfe commit with the latest gufe release
# meant to be used for release prep to catch feedstock issues before releasing on github
name: "release prep: test conda-forge package build"
concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: true
defaults:
run:
shell: bash -leo pipefail {0}
on:
workflow_dispatch:
# TODO: run when "release prep" label is added
jobs:
test-conda-build:
runs-on: ubuntu-latest
steps:
- name: Checkout openfe repository
uses: actions/checkout@v4
with:
path: openfe
- name: Checkout conda-forge feedstock
uses: actions/checkout@v4
with:
repository: conda-forge/openfe-feedstock
path: openfe-feedstock
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Install conda-build dependencies
run: |
pip install pyyaml
# TODO just checkout the repo where we need it?
- name: Copy source code to recipe folder
run: cp -r openfe openfe-feedstock/recipe/openfe_source
- name: Get Latest gufe Version
id: latest-gufe-version
run: |
REPO="openfreeenergy/gufe"
VERSION=$(curl -s -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
"https://api.github.com/repos/$REPO/releases/latest" | jq -r '.tag_name | ltrimstr("v")')
echo $VERSION
echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
- name: Modify feedstock to use local path and latest gufe
uses: mikefarah/yq@master
with:
cmd: |
cd openfe-feedstock
# Backup original recipe.yaml
cp recipe/recipe.yaml recipe/recipe.yaml.bak
# Add path after 'source:' and delete url line
yq -i '.source.path="./openfe_source"' recipe/recipe.yaml
yq -i 'del(.source.url)' recipe/recipe.yaml
# remove existing gufe entry and add the gufe pin we want
yq -i 'del(.outputs.[0].requirements.run[] | select(. =="*gufe*"))' recipe/recipe.yaml
yq -i '.outputs.[0].requirements.run += "gufe==${{ steps.latest-gufe-version.outputs.VERSION }}"' recipe/recipe.yaml
echo "Modified recipe.yaml:"
cat recipe/recipe.yaml
- name: Run conda-forge build test
run: |
cd openfe-feedstock
python build-locally.py
continue-on-error: true
id: build_test
# Uncomment if build_artifacts is needed to troubleshoot build
# - name: Upload build logs
# if: always()
# uses: actions/upload-artifact@v4
# with:
# name: conda-build-logs
# path: |
# openfe-feedstock/build_artifacts/
# openfe-feedstock/recipe/recipe.yaml
# openfe-feedstock/recipe/recipe.yaml.bak
# if-no-files-found: warn
- name: Check build status
if: steps.build_test.outcome == 'failure'
run: |
echo "❌ Conda forge build test failed. Check the uploaded logs for details."
exit 1
================================================
FILE: .gitignore
================================================
# custom ignores
.duecredit.p
.xxrun
.idea/
.vscode/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*~
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
docs/reference/api/generated
docs/tutorials/*.png
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# vim
*.swp
# vscode
.vscode/
# Example notebooks
docs/ExampleNotebooks/
# duecredit
.duecredit.p
# Some charge stuff
*.model.pt
# Rever
rever/
================================================
FILE: .pre-commit-config.yaml
================================================
ci:
autoupdate_schedule: quarterly
# comment / label "pre-commit.ci autofix" to a pull request to manually trigger auto-fixing
autofix_prs: false
skip: []
submodules: false
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-added-large-files
args: ["--maxkb=900"]
- id: check-case-conflict
- id: check-executables-have-shebangs
- id: check-symlinks
- id: check-toml
- id: check-yaml
exclude: devtools/installer/construct.yaml # not a true YAML file
- id: debug-statements
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "v2.21.0"
hooks:
- id: pyproject-fmt
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.9
hooks:
# Run the linter.
- id: ruff
args: [--fix ]
# Run the formatter.
- id: ruff-format
================================================
FILE: .readthedocs.yaml
================================================
version: 2
build:
os: "ubuntu-24.04"
tools:
python: "miniconda3-3.12-24.9"
sphinx:
configuration: docs/conf.py
fail_on_warning: true
conda:
environment: docs/environment.yaml
python:
# Install our python package before building the docs
install:
- method: pip
path: .
================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: "Alibay"
given-names: "Irfan"
orcid: "https://orcid.org/0000-0001-5787-9130"
- family-names: "Gowers"
given-names: "Richard J."
orcid: "https://orcid.org/0000-0002-3241-1846"
- family-names: "Swenson"
given-names: "David W.H."
orcid: "https://orcid.org/0000-0001-9922-7923"
- family-names: "Henry"
given-names: "Michael M."
orcid: "https://orcid.org/0000-0002-3870-9993"
- family-names: "Ries"
given-names: "Benjamin"
orcid: "https://orcid.org/0000-0002-0945-8304"
- family-names: "Baumann"
given-names: "Hannah M."
orcid: "https://orcid.org/0000-0002-1736-7744"
- family-names: "Eastwood"
given-names: "James R. B."
orcid: "https://orcid.org/0000-0003-3895-5227"
- given-names: "Ashley"
family-names: "Mitchell"
orcid: 'https://orcid.org/0000-0002-8246-5113'
- given-names: "David"
family-names: "Dotson"
orcid: "https://orcid.org/0000-0001-5879-2942"
- given-names: Joshua T.
family-names: Horton
orcid: 'https://orcid.org/0000-0001-8694-7200'
- given-names: Matthew
family-names: Thompson
orcid: 'https://orcid.org/0000-0002-1460-3983'
- given-names: Alyssa
family-names: Travitz
orcid: 'https://orcid.org/0000-0001-5953-8807'
title: "The Open Free Energy library"
version: 1.7.0
date-released: 2025-04-25
url: "https://openfree.energy/"
repository-code: "https://github.com/openfreeEnergy/openfe"
doi: 10.5281/zenodo.8344247
================================================
FILE: Code_of_Conduct.md
================================================
## Code of Conduct ##
This project is dedicated to providing a welcoming and supportive environment for all people, regardless of background or identity. Members do not tolerate harassment for any reason, but especially harassment based on gender, sexual orientation, disability, physical appearance, body size, race, nationality, sex, color, ethnic or social origin, pregnancy, citizenship, familial status, veteran status, genetic information, religion or belief, political or any other opinion, membership of a national minority, property, age, or preference of text editor.
### Expected Behavior ###
All participants in our events and communications are expected to show respect and courtesy to others. All interactions should be professional regardless of platform: either online or in-person. In order to foster a positive and professional working environment we encourage the following kinds of behaviors in all work events, activities, and platforms:
* Use welcoming and inclusive language
* Be respectful of different viewpoints and experiences
* Gracefully accept constructive criticism
* Focus on what is best for the community
* Show courtesy and respect towards other community members
Note: See the [four social rules](https://www.recurse.com/manual#sub-sec-social-rules) for further recommendations.
### Unacceptable Behavior ###
Harassment is any form of behavior intended to exclude, intimidate, or cause discomfort. Prohibited harassing behavior includes, but is not limited to:
* written or verbal comments which have the effect of excluding people
* causing someone to fear for their safety, such as through stalking, following, or intimidating
* the display of sexual or violent images
* unwelcome sexual attention
* non-consensual or unwelcome physical contact
* sustained disruption of talks, events, or communications
* incitement to violence, suicide, or self-harm
* continuing to initiate interaction (including photography or recording) with someone after being asked to stop
and
* publication of private comment without consent
This list should not be taken to be exhaustive, but rather as a guide to make it easier to enrich our community and all those in which we participate. All interactions should be professional regardless of location: Harassment is prohibited whether it occurs on or offline, and the same standards apply to both.
Enforcement of this Code of Conduct will be respectful and not include any harassing behaviors.
You deserve sincere thanks for helping to make this a welcoming, friendly community for all.
This Code of Conduct was adapted from the [cmelab](https://github.com/cmelab/getting-started/blob/master/wiki/pages/Code_of_Conduct.md).
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2022 OpenFreeEnergy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: MANIFEST.in
================================================
recursive-include src/openfe/tests/data/ *.sdf
recursive-include src/openfe/tests/data/ *.bz2
recursive-include src/openfe/tests/data/ *.csv
recursive-include src/openfe/tests/data/ *.pdb
recursive-include src/openfe/tests/data/ *.mol2
recursive-include src/openfe/tests/data/ *.xml
recursive-include src/openfe/tests/data/ *.graphml
recursive-include src/openfe/tests/data/ *.edge
recursive-include src/openfe/tests/data/ *.dat
recursive-include src/openfe/tests/data/ *.txt
recursive-include src/openfe/tests/data/ *.gz
recursive-include src/openfe/tests/data/ *json_results.gz
include src/openfecli/tests/data/*.json
include src/openfecli/tests/data/*.tar.gz
include src/openfecli/tests/commands/test_gather/*.tsv
recursive-include src/openfecli/tests/ *.sdf
recursive-include src/openfecli/tests/ *.pdb
include src/openfe/tests/data/openmm_rfe/vacuum_nocoord.nc
================================================
FILE: README.md
================================================
[](https://openfree.energy/)
[](https://github.com/OpenFreeEnergy/openfe/actions/workflows/ci.yaml)
[](https://codecov.io/gh/OpenFreeEnergy/openfe)
[](https://docs.openfree.energy/en/stable/?badge=stable)
[](https://doi.org/10.5281/zenodo.17258732)
# `openfe` - A Python package for executing alchemical free energy calculations.
The `openfe` package is the flagship project of [Open Free Energy](https://openfree.energy),
a pre competitive consortium aiming to provide robust, permissively licensed open source tools for molecular simulation in the drug discovery field.
Using `openfe` you can easily plan and execute alchemical free energy calculations.
See our [website](https://openfree.energy/) for more information on the project,
[try for yourself](https://try.openfree.energy) from the comfort of your browser,
and we have [documentation on using the package](https://docs.openfree.energy/en/latest/index.html).
## License
This library is made available under the [MIT](https://opensource.org/licenses/MIT) open source license.
## Install
### Latest release
The latest release of `openfe` can be installed via `mamba`, `docker`, or a `single file installer`. See [our installation instructions](https://docs.openfree.energy/en/stable/installation.html) for more details.
Dependencies can be installed via conda through:
### Development version
The development version of `openfe` can be installed directly from the `main` branch of this repository.
First install the package dependencies using `mamba`:
```bash
mamba env create -f environment.yml
```
The openfe library can then be installed via:
```
python -m pip install --no-deps .
```
## Authors
The OpenFE development team.
## Acknowledgements
OpenFE is an [Open Molecular Software Foundation](https://omsf.io/) hosted project.
================================================
FILE: codecov.yml
================================================
coverage:
status:
project: off
================================================
FILE: devtools/data/fix_rbfe_results.py
================================================
"""A script to fix up rbfe_results.tar.gz
Useful if Settings are ever changed in a backwards-incompatible way
Will expect "rbfe_results.tar.gz" in this directory, will overwrite this file
"""
import glob
import json
import os.path
import tarfile
from gufe.tokenization import JSON_HANDLER
from openfe.protocols import openmm_rfe
def untar(fn):
"""extract tarfile called *fn*"""
with tarfile.open(fn) as f:
f.extractall()
def retar(loc, name):
"""create tar.gz called *name* of directory *loc*"""
with tarfile.open(name, mode="w:gz") as f:
f.add(loc, arcname=os.path.basename(loc))
def replace_settings(fn, new_settings):
"""replace settings instances in *fn* with *new_settings*"""
with open(fn, "r") as f:
data = json.load(f)
for k in data["protocol_result"]["data"]:
data["protocol_result"]["data"][k][0]["inputs"]["settings"] = new_settings
for k in data["unit_results"]:
data["unit_results"][k]["inputs"]["settings"] = new_settings
with open(fn, "w") as f:
json.dump(data, f, cls=JSON_HANDLER.encoder)
def fix_rbfe_results():
untar("rbfe_results.tar.gz")
# generate valid settings as defaults
new_settings = openmm_rfe.RelativeHybridTopologyProtocol.default_settings()
# walk over all result jsons
for fn in glob.glob("./results/*json"):
# replace instances of settings within with valid settings
replace_settings(fn, new_settings)
retar("results", "rbfe_results.tar.gz")
if __name__ == "__main__":
fix_rbfe_results()
================================================
FILE: devtools/data/gen_serialized_results.py
================================================
"""
Dev script to generate some result jsons that are used for testing
Generates
- ABFEProtocol_json_results.gz
- used in abfe_results_json fixture
- SepTopProtocol_json_results.gy
- used in septop_json fixture
- AHFEProtocol_json_results.gz
- used in afe_solvation_json fixture
- RHFEProtocol_json_results.gz
- used in rfe_transformation_json fixture
- MDProtocol_json_results.gz
- used in md_json fixture
"""
import gzip
import json
import logging
import pathlib
import sys
import tempfile
import gufe
from gufe.tokenization import JSON_HANDLER
from kartograf import KartografAtomMapper
from kartograf.atom_aligner import align_mol_shape
from openff.toolkit import AmberToolsToolkitWrapper, Molecule, RDKitToolkitWrapper
from openff.toolkit.utils.toolkit_registry import ToolkitRegistry, toolkit_registry_manager
from openff.units import unit
from rdkit import Chem
import openfe
from openfe.protocols.openmm_afe import (
AbsoluteBindingProtocol,
AbsoluteSolvationProtocol,
)
from openfe.protocols.openmm_md.plain_md_methods import PlainMDProtocol
from openfe.protocols.openmm_rfe import RelativeHybridTopologyProtocol
from openfe.protocols.openmm_septop import SepTopProtocol
from openfecli.utils import configure_logger
# avoid problems with output not showing if queueing system kills a job
sys.stdout.reconfigure(line_buffering=True)
stdout_handler = logging.StreamHandler(sys.stdout)
configure_logger("gufekey", handler=stdout_handler)
configure_logger("gufe", handler=stdout_handler)
configure_logger("openfe", handler=stdout_handler)
configure_logger("openmmtools.multistate.multistatereporter", level=logging.DEBUG, handler=stdout_handler) # fmt: skip
configure_logger("openmmtools.multistate.multistatesampler", level=logging.DEBUG, handler=stdout_handler) # fmt: skip
logger = logging.getLogger(__name__)
LIGA = "[H]C([H])([H])C([H])([H])C(=O)C([H])([H])C([H])([H])[H]"
LIGB = "[H]C([H])([H])C(=O)C([H])([H])C([H])([H])C([H])([H])[H]"
amber_rdkit = ToolkitRegistry([RDKitToolkitWrapper(), AmberToolsToolkitWrapper()])
def get_molecule(smi, name):
with toolkit_registry_manager(amber_rdkit):
m = Molecule.from_smiles(smi)
m.generate_conformers()
m.assign_partial_charges(partial_charge_method="am1bcc")
return openfe.SmallMoleculeComponent.from_openff(m, name=name)
def get_hif2a_inputs():
with gzip.open("inputs/hif2a_protein.pdb.gz", "r") as f:
protcomp = openfe.ProteinComponent.from_pdb_file(f, name="hif2a_prot")
with gzip.open("inputs/hif2a_ligands.sdf.gz", "r") as f:
smcs = [
openfe.SmallMoleculeComponent(mol)
for mol in list(Chem.ForwardSDMolSupplier(f, removeHs=False))
]
return smcs, protcomp
def execute_and_serialize(
dag,
protocol,
simname,
new_serialization: bool = False
): # fmt: skip
"""
Execute & serialize a DAG
Parameters
----------
dag : gufe.ProtocolDAG
The DAG to execute & serialize.
protocol : gufe.Protocol
The Protocol to which the DAG belongs.
simname : str
The name of the simulation, used for the serialized file name.
new_serialization : bool
Whether or not we should use the "new" `to_json` serialization.
Default is False (for now).
"""
logger.info(f"running {simname}")
with tempfile.TemporaryDirectory() as tmpdir:
workdir = pathlib.Path(tmpdir)
dagres = gufe.protocols.execute_DAG(
dag,
shared_basedir=workdir,
scratch_basedir=workdir,
keep_shared=True,
raise_error=True,
n_retries=2,
)
protres = protocol.gather([dagres])
if new_serialization:
protres.to_json(f"{simname}_json_results.json")
else:
outdict = {
"estimate": protres.get_estimate(),
"uncertainty": protres.get_uncertainty(),
"protocol_result": protres.to_dict(),
"unit_results": {
unit.key: unit.to_keyed_dict()
for unit in dagres.protocol_unit_results
}
} # fmt: skip
with gzip.open(f"{simname}_json_results.gz", "wt") as zipfile:
json.dump(outdict, zipfile, cls=JSON_HANDLER.encoder)
def generate_md_settings():
settings = PlainMDProtocol.default_settings()
settings.simulation_settings.equilibration_length_nvt = 0.01 * unit.nanosecond
settings.simulation_settings.equilibration_length = 0.01 * unit.nanosecond
settings.simulation_settings.production_length = 0.01 * unit.nanosecond
settings.forcefield_settings.nonbonded_method = "nocutoff"
return settings
def generate_md_json(smc):
protocol = PlainMDProtocol(settings=generate_md_settings())
system = openfe.ChemicalSystem({"ligand": smc})
dag = protocol.create(stateA=system, stateB=system, mapping=None)
execute_and_serialize(dag, protocol, "MDProtocol")
def generate_abfe_settings():
settings = AbsoluteBindingProtocol.default_settings()
settings.solvent_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.production_length = 10 * unit.picosecond
settings.solvent_simulation_settings.equilibration_length = 100 * unit.picosecond
settings.solvent_simulation_settings.production_length = 500 * unit.picosecond
settings.solvent_simulation_settings.time_per_iteration = 2.5 * unit.ps
settings.complex_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
settings.complex_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.complex_equil_simulation_settings.production_length = 100 * unit.picosecond
settings.complex_simulation_settings.equilibration_length = 100 * unit.picosecond
settings.complex_simulation_settings.production_length = 500 * unit.picosecond
settings.complex_simulation_settings.time_per_iteration = 2.5 * unit.ps
settings.solvent_solvation_settings.box_shape = "dodecahedron"
settings.complex_solvation_settings.box_shape = "dodecahedron"
settings.solvent_solvation_settings.solvent_padding = 1.5 * unit.nanometer
settings.complex_solvation_settings.solvent_padding = 1.0 * unit.nanometer
settings.forcefield_settings.nonbonded_cutoff = 0.8 * unit.nanometer
settings.protocol_repeats = 3
settings.engine_settings.compute_platform = "CUDA"
return settings
def generate_abfe_json():
ligands, protein = get_hif2a_inputs()
protocol = AbsoluteBindingProtocol(settings=generate_abfe_settings())
sysA = openfe.ChemicalSystem(
{
"ligand": ligands[0],
"protein": protein,
"solvent": openfe.SolventComponent(),
}
)
sysB = openfe.ChemicalSystem(
{
"protein": protein,
"solvent": openfe.SolventComponent(),
}
)
dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
execute_and_serialize(dag, protocol, "ABFEProtocol", new_serialization=True)
def generate_ahfe_settings():
settings = AbsoluteSolvationProtocol.default_settings()
settings.solvent_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.production_length = 10 * unit.picosecond
settings.solvent_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.solvent_simulation_settings.production_length = 500 * unit.picosecond
settings.vacuum_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.vacuum_equil_simulation_settings.production_length = 10 * unit.picosecond
settings.vacuum_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.vacuum_simulation_settings.production_length = 1000 * unit.picosecond
settings.lambda_settings.lambda_elec = [0.0, 0.25, 0.5, 0.75, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0] # fmt: skip
settings.lambda_settings.lambda_vdw = [0.0, 0.0, 0.0, 0.0, 0.0, 0.12, 0.24,
0.36, 0.48, 0.6, 0.7, 0.77, 0.85,
1.0] # fmt: skip
settings.protocol_repeats = 3
settings.solvent_simulation_settings.n_replicas = 14
settings.vacuum_simulation_settings.n_replicas = 14
settings.solvent_simulation_settings.early_termination_target_error = 0.12 * unit.kilocalorie_per_mole # fmt: skip
settings.vacuum_simulation_settings.early_termination_target_error = 0.12 * unit.kilocalorie_per_mole # fmt: skip
settings.vacuum_engine_settings.compute_platform = "CPU"
settings.solvent_engine_settings.compute_platform = "CUDA"
return settings
def generate_ahfe_json(smc):
protocol = AbsoluteSolvationProtocol(settings=generate_ahfe_settings())
sysA = openfe.ChemicalSystem({"ligand": smc, "solvent": openfe.SolventComponent()})
sysB = openfe.ChemicalSystem({"solvent": openfe.SolventComponent()})
dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
execute_and_serialize(dag, protocol, "AHFEProtocol")
def generate_rfe_settings():
settings = RelativeHybridTopologyProtocol.default_settings()
settings.simulation_settings.equilibration_length = 10 * unit.picosecond
settings.simulation_settings.production_length = 250 * unit.picosecond
settings.forcefield_settings.nonbonded_method = "nocutoff"
return settings
def generate_rfe_json(smcA, smcB):
protocol = RelativeHybridTopologyProtocol(settings=generate_rfe_settings())
a_smcB = align_mol_shape(smcB, ref_mol=smcA)
mapper = KartografAtomMapper(atom_map_hydrogens=True)
mapping = next(mapper.suggest_mappings(smcA, a_smcB))
systemA = openfe.ChemicalSystem({"ligand": smcA})
systemB = openfe.ChemicalSystem({"ligand": a_smcB})
dag = protocol.create(stateA=systemA, stateB=systemB, mapping=mapping)
execute_and_serialize(dag, protocol, "RHFEProtocol")
def generate_septop_settings():
settings = SepTopProtocol.default_settings()
settings.solvent_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.solvent_equil_simulation_settings.production_length = 10 * unit.picosecond
settings.solvent_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.solvent_simulation_settings.production_length = 50 * unit.picosecond
settings.solvent_simulation_settings.time_per_iteration = 2.5 * unit.ps
settings.complex_equil_simulation_settings.equilibration_length_nvt = 10 * unit.picosecond
settings.complex_equil_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.complex_equil_simulation_settings.production_length = 10 * unit.picosecond
settings.complex_simulation_settings.equilibration_length = 10 * unit.picosecond
settings.complex_simulation_settings.production_length = 50 * unit.picosecond
settings.complex_simulation_settings.time_per_iteration = 2.5 * unit.ps
settings.solvent_solvation_settings.box_shape = "dodecahedron"
settings.complex_solvation_settings.box_shape = "dodecahedron"
settings.solvent_solvation_settings.solvent_padding = 1.2 * unit.nanometer
settings.complex_solvation_settings.solvent_padding = 1.0 * unit.nanometer
settings.forcefield_settings.nonbonded_cutoff = 0.9 * unit.nanometer
settings.protocol_repeats = 1
settings.engine_settings.compute_platform = "CUDA"
return settings
def generate_septop_json():
hif2a_ligands, hif2a_protein = get_hif2a_inputs()
protocol = SepTopProtocol(settings=generate_septop_settings())
sysA = openfe.ChemicalSystem(
{
"ligand_A": hif2a_ligands[0],
"protein": hif2a_protein,
"solvent": openfe.SolventComponent(),
}
)
sysB = openfe.ChemicalSystem(
{
"ligand_B": hif2a_ligands[1],
"protein": hif2a_protein,
"solvent": openfe.SolventComponent(),
}
)
dag = protocol.create(stateA=sysA, stateB=sysB, mapping=None)
execute_and_serialize(dag, protocol, "SepTopProtocol")
if __name__ == "__main__":
molA = get_molecule(LIGA, "ligandA")
molB = get_molecule(LIGB, "ligandB")
generate_md_json(molA)
generate_abfe_json()
generate_ahfe_json(molA)
generate_rfe_json(molA, molB)
generate_septop_json()
================================================
FILE: devtools/debug_openmm.sh
================================================
#!/usr/bin/env bash
echo "Run this script with your conda env activated"
echo "Invoke the script like this: "
echo "./debug_openmm.sh | tee -a debug.log"
echo "Then send us debug.log"
set -euo pipefail
date
which -a python
conda info -a || echo "no conda"
mamba info -a || echo "no mamba"
micromamba info || echo "no micromamba"
nvidia-smi || echo "no nvidia-smi, are you on a gpu node?"
echo "test openmm"
python -m openmm.testInstallation || echo "testing openmm"
echo "checking plugin load failures"
python -c "import openmm; print(openmm.Platform.getPluginLoadFailures())" || echo "plugin load failures"
echo "checking which platforms support mixed precision"
python -c "import openmmtools; [print(_.getName()) for _ in openmmtools.utils.get_available_platforms(minimum_precision='mixed')]" || echo "openmm errors"
conda list || echo "no conda"
mamba list || echo "no mamba"
micromamba list || echo "no micromamba"
================================================
FILE: devtools/installer/construct.yaml
================================================
name: OpenFEforge
version: {{ environ["VERSION"] }}
company: OpenFE
license_file: ../../LICENSE
channels:
- conda-forge
write_condarc: True
keep_pkgs: True
transmute_file_type: .conda
specs:
- conda
- jupyterlab
- mamba
- notebook <7
- openfe=={{ environ["VERSION"] }}
- pip
- pytest
- pytest-xdist
# python needs to match https://github.com/googlecolab/backend-info/blob/main/os-info.txt
# until colab pushes a fix
- python 3.12.11
# Not building an .exe for windows or a .pkg for macOS
installer_type: sh
================================================
FILE: docs/CHANGELOG.rst
================================================
=========
Changelog
=========
.. current developments
v1.11.1
====================
**Fixed:**
* Fixed slow response time of CLI commands (`PR #1972 `_).
v1.11.0
====================
* **openfe v1.11.0** introduces support for protein-membrane systems both with the Python API and the CLI. See our tutorial `RBFE calculations of a Protein-Membrane System `_ for details.
The `ability to resume execution of incomplete transformations `_ that was introduced in ``openfe v1.10.0`` is now available for the plain MD and SepTop protocols.
See below for the full changelog for this release:
**Added:**
* Added support for systems with membranes to the following protocols:
PlainMDProtocol, RelativeHybridTopologyProtocol, SepTopProtocol, and AbsoluteBindingProtocol (`PR #1561 `_).
* Added support for membrane systems to ``openfe plan-rbfe-network``.
Use ``--protein-membrane`` instead of the ``--protein`` argument, and see the tutorial on preparing membrane systems (`PR #1896 `_).
* Added API support for resuming the PlainMDProtocol (`PR #1884 `_).
* Added API support for resuming the SepTopProtocol. (`PR #1949 `_).
* The ``validate`` method for the SepTopProtocol has been implemented.
This means that settings and system validation can mostly be done prior to Protocol execuation by calling ``SepTopProtocol.validate(stateA, stateB, mapping=None)`` (`PR #1946 `_).
**Changed:**
* The SepTopProtocol now has a dedicated Analysis unit.
At the top level API, this does not change behavior, but if you are directly interfacing with th ProtocolUnits, you will have to account for this change.
The SepTopProtocolResult now solely uses the Analysis units (`PR #1937 `_).
* Updated the chemical systems user guide and the defining protocols user guide to reflect recent protocol updates, including adding membrane support (`PR #1933 `_).
* The default value for the Hybrid TopologyProtocol setting ``turn_off_core_unique_exceptions`` has been changed to ``True``.
This means 1-4 interactions involving the unique alchemical atoms and core regions will now be interpolated on/off accordingly by default (`PR #1856 `_).
**Deprecated:**
* Perses atom mapper and scorer functionality is deprecated, now slated to be removed in ``openfe v1.12``.
This includes ``PersesAtomMapper`` and ``default_perses_scorer`` (`PR #1857 `_).
**Fixed:**
* Fix erroneous logging information message which would mention setting up the alchemical system when running simulation or analysis units with the hybrid topology, AHFE or ABFE Protocols (`PR #1915 `_).
* System equality checks on resuming no longer expect complete equality in the force parameters.
This fixes a scenario where small changes in precision due to running on different machines would prevent users from restarting their simulations (`PR #1914 `_).
v1.10.0
====================
This release introduces the ability to resume execution of an incomplete transformation using ``openfe quickrun`` with the ``--resume`` flag.
See the `quickrun documentation `_ details.
**Added:**
* Added ``--resume`` flag to ``openfe quickrun``.
Quickrun now temporarily caches ``protocolDAG`` information and, when used with the ``--resume`` flag, quickrun will attempt to resume execution of an incomplete transformation (`PR #1848 `_).
* Added API support to resume ``RelativeHybridTopologyProtocol`` simulations (`PR #1774 `_).
* Added API support to resume ``AbsoluteBindingProtocol`` and ``AbsoluteSolvationProtocol`` simulations (`PR #1808 `_).
**Deprecated:**
* Perses atom mapper and scorer functionality is deprecated, slated to be removed in ``openfe v2.0``.
This includes ``PersesAtomMapper`` and ``default_perses_scorer`` (`PR #1857 `_).
**Fixed:**
* Fixed bug introduced in v1.9.0 to ``openfe gather-abfe --report=raw`` where additional unit results for Setup and Simulation units would be shown.
This fix restores the behavior prior to v1.9.0 (`PR #1876 `_).
v1.9.1
====================
**Fixed:**
* Fixed a bug in Protocol termination for the HybridTop and AFE Protocols which would unnecessarily declare an ``UnboundLocalError``.
* Updated ``openfe_analysis`` dependency to fix issue with RMSD analysis (`Issue #1834 `_).
v1.9.0
====================
**Added:**
* The ``validate`` method for the RelativeHybridTopologyProtocol has been implemented.
This means that settings and system validation can mostly be done prior to Protocol execution by calling ``RelativeHybridTopologyProtocol.validate(stateA, stateB, mapping)`` (`PR #1740 `_).
* Added ``openfe test --download-only`` flag, which downloads all test data stored remotely to the local cache (`PR #1814 `_).
**Changed:**
* The absolute free energy protocols (AbsoluteBindingProtocol and AbsoluteSolvationProtocol) have been broken into multiple
protocol units, allowing for setup, run, and analysis to happen
separately in the future when relevant changes to protocol execution are
made (`PR #1776 `_).
* The relative free energy protocol (RelativeHybridTopologyProtocol) has been
broken into multiple protocol units, allowing for the setup, run, analysis to happen
separately (`PR #1773 `_).
**Fixed:**
* Fixed bug in ligand network visualization (such as with ``openfe view-ligand-network``) so that ligand names are no longer cut off by the plot border (`PR #1822 `_).
* Endstates in the RelativeHybridTopologyProtocol are now being created
in a manner that allows for isomorphic molecules that differ between
endstates to have different parameters (`PR #1772 `_).
v1.8.1
====================
**Added:**
* Added a progress bar for ``openfe gather`` JSON loading (`PR #1786 `_).
**Fixed:**
* Due to issues with OpenFF's handling of toolkit registries
with NAGL, the use of NAGL models (e.g. AshGC) when OpenEye
is installed but not requested as the charge backend has been
disabled (Issue #1760, `PR #1762 `_).
* Fixed bug in ligand network visualization (such as with ``openfe view-ligand-network``) so that ligand names are no longer cut off by the plot border (`PR #1822 `_).
v1.8.0
====================
**Added:**
* The ``HybridTopologyFactory`` supports building hybrid OpenMM systems which contain ``CMAPTorsionForces`` on non-alchemical atoms.
This should allow for simulations using Amber ff19SB (`PR #1695 `_).
* Added experimental features ``openfe gather-septop`` and ``openfe gather-abfe``, which are analogous to ``openfe gather`` and allow for gathering results generated by the Separated Topologies and Absolute Binding Free Energy protocols, respectively. These commands are experimental and are liable to be changed in a future release.
* Emit a clarifying log message when a user gets a warning from JAX (`PR #1585 `_, fixes `Issue #1499 `_).
* Disable JAX acceleration by default, see https://docs.openfree.energy/en/latest/guide/troubleshooting.html#pymbar-disable-jax for more information (`PR #1694 `_).
* New options have been added to the ``AlchemicalSettings`` of the ``SepTopProtocol``, ``AbsoluteSolvationProtocol`` and ``AbsoluteBindingProtocol``. Notably, these options allow users to control the softcore parameters as well as the use of long range dispersion corrections (`PR #1742 `_).
**Changed:**
* ``openfe gather`` is now more rigorous in extracting ligand names and run types. These are now determined directly from component attributes, rather than relying on naming conventions. (`PR #1691 `_).
* Updated installation docs to recommend ``miniconda`` with ``conda-lock`` as the preferred installation method (`PR #1692 `_).
v1.7.0
====================
This release brings several long awaited features to OpenFE, including the SepTop and ABFE Protocols, as well as the adoption of more computationally efficient settings in the CLI and across the Python API.
The v1.7.0 release also comes with some API changes and breaks, including:
* "CUDA" is now the default platform in the settings, you will need to change this if you run on a non-NVIDIA-powered platform.
* The default solvation cutoff is now 1.5 nm, to avoid issues with small boxes when dealing with ligands in solvent. When calculating complexes using the MD or HybridTopology Protocols with the API, you will need to reduce this value to ~ 1 nm to avoid excessively large water boxes.
* The API has fully migrated to Pydantic V2 and the ``GufeQuantity`` scheme. This only affects Protocol developers. If needed, please see the `gufe typing documentation `_ for more details.
Note that if you want to use NAGL to assign partial charges, you must use ``python >= 3.11``.
Python 3.10 support is no longer maintained according to `SPEC 0 `_ guidelines.
The openfe lock file and docker and apptainer images use Python 3.12, and so charge assignment with NAGL will work without intervention.
**Added:**
* Addition of an Absolute Binding Free Energy Protocol (`PR #1045 `_).
* Added `a cookbook for using jq to inspect JSON files `_.
* The AbsoluteSolvationProtocol now properly implements the ``validate`` method,
allowing users to verify inputs by calling the method directly (`PR #1572 `_).
* Added a new RBFE protocol based on Separated Topologies (`PR #1057 `_).
**Changed:**
* The default atom mapper used in the CLI has been changed from ``LomapAtomMapper`` to ``KartografAtomMapper`` in line with the recommended defaults from the industry benchmarking paper. Users who wish to continue to use ``LomapAtomMapper`` can do so via the YAML configuration file. See the `documentation `_ for details (`PR #1530 `_).
* An improved error message is now shown when a mapping involving a changing constraint length cannot be fixed (`PR #1529 `_).
* The default platform for OpenMM-based Protocols is now CUDA and will fail by default on a non-Nvidia GPU enabled system (`PR #1576 `_).
* Remove unnecessary limit on residues ids (``resids``) when getting mappings from topology in ``topology_helpers.py`` utility module (`PR #1539 `_).
* The relative hybrid topology protocol no longer runs the FIRE minimizer when ``dry=True`` (`PR #1468 `_).
* Units must be explicitly assigned when defining ``Settings`` parameters, and values will be converted to match the default units for a given field. For example, use ``1.0 * units.bar`` or ``"1 bar"`` for pressure, and ``300 * unit.kelvin`` or ``"300 kelvin"`` for temperature.
* For protocol developers: ``FloatQuantity`` is no longer supported. Instead, use ``GufeQuantity`` and ``specify_quantity_units()`` to make a ``TypeAlias``. See the `gufe typing documentation `_ for more details.
* The default ``time_per_iteration`` setting of the ``MultiStateSimulationSettings`` class has been increased from 1.0 ps to 2.5 ps as part of the fast settings update (`PR #1523 `_).
* The default ``box_shape`` setting of the ``OpenMMSolvationSettings`` class has been changed from ``cubic`` to ``dodecahedron`` to improve simulation efficiency as part of the fast settings update (`PR #1523 `_).
* The default ``solvent_padding`` settings of the ``OpenMMSolvationSettings`` class has been increased from 1.2 nm to 1.5 nm to be compatible with the new ``box_shape`` default as part of the fast settings update (`PR #1523 `_).
* The default ``nonbonded_cutoff`` setting of the ``OpenMMSystemGeneratorFFSettings`` class has been decreased to 0.9 nm from 1.0 nm, in line with current force fields best practices and our newly validated fast settings (`PR #1523 `_).
* When calling the CLI ``openfe plan_rbfe_network``, the ``RelativeHybridTopologyProtocol`` settings now reflects the above "fast" settings updates. This includes;
* Dodecahedron box solvation
* Solvation cutoff of 1.5 nm in solvent-only legs, and 1.0 nm in complex legs
* A replica exchange rate of 2.5 ps
* A 0.9 nm nonbonded cutoff
**Deprecated:**
* Deprecated ``openfe.utils.visualization_3D.view_mapping_3d()``. Use the method ``LigandAtomMapping.view_3d()`` instead (`PR #1592 `_).
* Deprecated ``openfe.utils.ligand_utils.get_alchemical_charge_difference()``, which is replaced by ``LigandAtomMapping.get_alchemical_charge_difference()`` in ``gufe`` (`PR #1479 `_).
**Fixed:**
* Charged molecules are now explicitly disallowed in the
AbsoluteSolvationProtocol(`PR #1572 `_).
v1.6.1
====================
This release includes minor fixes and updates to tests.
**Added:**
* Added a cookbook for using ``jq`` to inspect JSON files.
**Changed:**
* Remove unnecessary limit on residues ids (``resids``) when getting mappings from topology in ``topology_helpers.py`` utility module.
* The relative hybrid topology protocol no longer runs the FIRE minimizer when ``dry=True``.
**Fixed:**
* Updated tests to expect to find NAGL, now that it is supported.
v1.6.0
====================
This release adds support for OpenMM 8.3.0 and Python 3.13.
**Added:**
* Added support for openmm 8.3.0 (benchmarking results at `Issue #1377 `_.
* Added support for ``python 3.13`` (we no longer guarantee support for ``python 3.10``).
* Adds a new internal API for defining alchemical restraints (`PR #1043 `_).
v1.5.0
====================
This release includes support for openmm 8.2 and numpy v2. Checkpoint interval default frequency has changed, resulting in much smaller file sizes. There are also a few minor changes as a result of migrating to use **konnektor** as the backend for many network generators.
**Added:**
* Added support for openmm 8.2 (`PR #1366 `_)
* Added optional ``n_processes`` (number of parallel processes to use when generating the network) arguments for network planners (`PR #927 `_).
* Added optional ``progress`` (whether to show progress bar) for ``openfe.setup.ligand_network_planning.generate_radial_network`` (default= ``False``, such that there is no default behavior change)(`PR #927 `_).
* Added compatibility for numpy v2 (`PR #1260 `_).
**Changed:**
* The checkpoint interval default frequency has been increased to every
nanosecond. ``real_time_analysis_interval`` no longer needs to be divisible
by the checkpoint interval, allowing users of the ``HybridTopologyProtocol``
and ``AbsoluteSolvationProtocol`` to write checkpoints less frequently and
yielding smaller file sizes.
* `konnektor `_ is now used as the backend for all network generation (`PR #927 `_).
* ``openfe.setup.ligand_network_planning.generate_maximal_network`` now returns the *best* mapping for each edge, rather than *all possible* mappings for each edge. If multiple mappers are passed but no scorer, the first mapper passed will be used, and a warning will be raised (`PR #927 `_).
**Fixed:**
* Absolute free energy calculations (e.g. ``AbsoluteSolvationProtocol``) now
correctly pass the equilibrated box vectors to the alchemical simulation.
In the past default vectors were used, which in some cases led to random
crashes due to an abrupt volume change. We do not believe that this
significantly affected free energy results (`PR #1275 `_).
v1.4.0
====================
This release includes significant quality of life improvements for the CLI's ``openfe gather`` command.
**Added:**
* ``openfe gather`` now accepts any number of filepaths and/or directories containing results JSON files, instead of only accepting one results directory (`PR #1212 `_).
* When running ``openfe gather --report=dg`` and result edges have fewer than 2 replicates, an error will be thrown up-front instead of failing downstream with a ``numpy.linalg.LinAlgError: SVD did not converge`` error (`PR #1243 `_).
* ``openfe gather`` includes failed simulations in its output, with ``Error`` listed in place of a computed value, instead of simply omitting those results from the output table (`PR #1227 `_).
* ``openfe gather --report=dg`` (the default) checks for connectivity of the results network and throws an error if the network is disconnected or has fewer than 3 edges (`PR #1227 `_).
* ``openfe gather`` prints warnings for all results JSONs whose simulations have failed or are otherwise invalid (`PR #1227 `_ ).
* ``openfe gather`` now throws an error up-front if no valid results are provided, instead of returning an empty table (`PR #1245 `_).
**Changed:**
* Improved formatting of ``openfe gather`` output tables. Use ``--tsv`` to instead view the raw tsv formatted output (this was the default behavior as of v1.3.x) (`PR #1246 `_).
* Improved responsiveness of several CLI commands (`PR #1254 `_).
v1.3.1
====================
Bugfix release - Improved error handling and code cleanup.
We are also dropping official support for MacOSX-x86_64.
Any platform-specific bugs will be addressed when possible, but as a low priority.
**Added:**
* ``openfe gather`` now detects failed simulations up-front and prints warnings to stdout (`PR #1207 `_).
**Changed:**
* Temporarily disabled bootstrap uncertainties in forward/reverse analysis due to solver loop issues when dealing with too small a set of samples (`PR #1174 `_).
**Removed:**
* Dropped official support for MacOSX-x86_64. Any platform-specific bugs will be addressed when possible, but as a low priority.
* Unused trajectory handling code was removed from ``openfe.utils``, please use ``openfe-analysis`` instead (`PR #1182 `_).
**Fixed:**
* Fixed `issue #1178 `_ -- The GPU system probe is now more robust to different ways the ``nvidia-smi`` command can fail (`PR #1186 `_)
* Fixed bug where openmm protocols using default settings would re-load from JSON as a different gufe key due to unit name string representation discrepancies (`PR #1210 `_)
v1.3.0
====================
**Added:**
* Added CLI support for ``generate_lomap_network``. This option can be specified as a `YAML-defined setting `_
* Added ``--n-protocol-repeats`` CLI option to allow user-defined number of repeats per quickrun execution. This allows for parallelizing execution of repeats by setting ``--n-protocol-repeats=1`` and calling ``quickrun`` on the same input file multiple times.
* Added a new CLI command (``charge-molecules``) to bulk assign partial charges to molecules `PR#1068 `_
* CLI setup will raise warnings for unsupported top-level YAML fields.
* OpenMMEngineSettings now has a `gpu_device_index` attribute allowing users to pass through a list of ``ints`` to select the GPU devices to run their simulations on.
* Add support for variable position/velocity trajectory writing.
* ``openfe gather`` now supports replicates that have been submitted in parallel across separate directories.
**Changed:**
* Networks planned using the CLI will now automatically use an extended protocol for transformations involving a net charge change `PR#1053 `_
* The ``plan-rhfe-network`` and ``plan-rbfe-network`` CLI commands will now assign partial charges before planning the network if charges are not present, the charge assignment method can be controlled via the yaml settings file `PR#1068 `_
* `openfe.protocols.openmm_rfe._rfe_utils.compute` has been moved to `openfe.protocols.openmm_utils.omm_compute`.
* ``openfe gather`` now includes *all* edges with missing runs (instead of just the first failing edge) when raising a "missing runs" error.
* ``openfe quickrun`` now creates the parent directory as-needed for user-defined output json paths (``-o``).
* The MBAR bootstrap (1000 iterations) error is used to estimate protocol uncertainty instead of the statistical uncertainty (one standard deviation) and pymbar3 is no longer supported `PR#1077 `_
* CLI network planners' default names use prefixes `rbfe_` or `rhfe_` , instead of `easy_rbfe` or `easy_rhfe`, to simplify default transformation names.
**Removed:**
* openfe is no longer tested against macos-12. macos support is, for now, limited to osx-arm64 (macos-14+).
**Fixed:**
* ``openfe quickrun`` now creates the parent directory as-needed for user-defined output json paths (``-o``).
* OpenMM CPU vacuum calculations now enforce the use of a single CPU to avoid large performance losses.
v1.2.0
====================
**Added:**
* New `cookbook featuring bespokefit `_
**Fixed:**
* Improved responsiveness of CLI calls
* Fixed bug where `openfe gather --report raw` was only including first replicates.
v1.1.0
====================
**Added:**
* Extended system solvation tooling, including support for; non-cubic boxes,
explicitly defining the number of waters added, the box vectors, and box size
as supported by `Modeller.addSolvent` in OpenMM 8.0 and above.
**Changed:**
* Improved documentation of the OpenMMSolvationSettings.
* The `PersesAtomMapper` now uses openff.units inline with the rest of the package.
* Structural analysis data is no longer written to `structural_analysis.json`
but rather a 32bit numpy compressed file named `structural_analysis.npz`
(`PR #937 `_).
* Structural analysis array data is no longer directly returned in the
RelativeHybridTopologyProtocol result dictionary. Instead it should
be accessed from the serialized NPZ file `structural_analysis.npz`.
The `structural_analysis` key now contains a path to the NPZ file,
if the structural analysis did not fail (the `structural_analysis_error`
key will instead be present on failure) (`PR #937 `_).
* Add duecredit citations for pymbar when calling
`openfe.protocols.openmm_utils.multistate_analysis`.
**Fixed:**
* 2D RMSD plotting now allows for fewer than 5 states (`PR #896 `_).
* 2D RMSD plotting no longer draws empty axes when
the number of states - 1 is not divisible by 4 (`PR #896 `_).
* The RelativeHybridTopologyProtocol result unit is now much smaller,
due to the removal of structural analysis data (`PR #937 `_).
v1.0.1
====================
**Added:**
* Debug script in devtools to test OpenMM installation.
* Use rever to manage changelog.
**Changed:**
* Updated docs to reference miniforge instead of mambaforge since they are the same now, see https://github.com/conda-forge/miniforge?tab=readme-ov-file#whats-the-difference-between-mambaforge-and-miniforge.
* The LomapAtomMapper defaults have now changed to better reflect real-life usage. Key kwarg changes include; `max3d=1.0` and `shift=False`.
**Fixed:**
* Calling `get_forward_and_reverse_energy_analysis` in the RFE and AFE protocols now results a warning if any results are ``None`` due to MBAR convergence issues.
* Checkpoint interval default value has been set to 250 ps instead of 1 ps.
This better matches the previous default for openfe versions < 1.0rc
(See `issue #772 `_ ).
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?= -v -W --keep-going
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/_ext/sass.py
================================================
"""
sphinxcontrib-sass
https://github.com/attakei-lab/sphinxcontrib-sass
Kayuza Takei
Apache 2.0
Modified to:
- Write directly to Sphinx output directory
- Infer targets if not given
- Ensure ``target: Path`` in ``configure_path()``
- Return version number and thread safety from ``setup()``
- Use compressed style by default
- More complete type checking
"""
from os import PathLike
from pathlib import Path
from typing import Optional, Union
import sass
from sphinx.application import Sphinx
from sphinx.environment import BuildEnvironment
from sphinx.util import logging
logger = logging.getLogger(__name__)
def configure_path(conf_dir: str, src: Optional[Union[PathLike, Path]]) -> Path:
if src is None:
target = Path(conf_dir)
else:
target = Path(src)
if not target.is_absolute():
target = Path(conf_dir) / target
return target
def get_targets(app: Sphinx) -> dict[Path, Path]:
src_dir = configure_path(app.confdir, app.config.sass_src_dir)
dst_dir = configure_path(app.outdir, app.config.sass_out_dir)
if isinstance(app.config.sass_targets, dict):
targets = app.config.sass_targets
else:
targets = {
path: path.relative_to(src_dir).with_suffix(".css")
for path in src_dir.glob("**/[!_]*.s[ca]ss")
}
return {src_dir / src: dst_dir / dst for src, dst in targets.items()}
def build_sass_sources(app: Sphinx, env: BuildEnvironment):
logger.debug("Building stylesheet files")
include_paths = [str(p) for p in app.config.sass_include_paths]
targets = get_targets(app)
output_style = app.config.sass_output_style
# Build css files
for src, dst in targets.items():
content = src.read_text()
css = sass.compile(
string=content,
output_style=output_style,
include_paths=[str(src.parent)] + include_paths,
)
dst.parent.mkdir(exist_ok=True, parents=True)
dst.write_text(css)
def setup(app: Sphinx):
"""
Setup function for this extension.
"""
logger.debug(f"Using {__name__}")
app.add_config_value("sass_include_paths", [], "html")
app.add_config_value("sass_src_dir", None, "html")
app.add_config_value("sass_out_dir", None, "html")
app.add_config_value("sass_targets", None, "html")
app.add_config_value("sass_output_style", "compressed", "html")
app.connect("env-updated", build_sass_sources)
return {
"version": "0.3.4ofe",
"parallel_read_safe": True,
"parallel_write_safe": True,
}
================================================
FILE: docs/_sass/deflist-flowchart.scss
================================================
:root {
--arrow-thickness: 4px;
--arrow-head-size: 7px;
--arrow-length: 2em;
--arrow-multiple-gap: 20px;
--arrow-color: var(--pst-color-text-muted);
--arrow-fade-dist: 0px;
--flowchart-def-bg-color: var(--pst-color-surface);
--flowchart-bg-color: var(--pst-color-background);
--flowchart-def-border-color: var(--pst-color-border);
--flowchart-unit-width: 45px;
--flowchart-spacing: 0.5rem;
--flowchart-column-gap: calc(1.5 * var(--flowchart-spacing));
--flowchart-top-label-space: 26px;
}
.arrow.thick {
--arrow-thickness: 6px;
--arrow-head-size: 10px;
}
.deflist-flowchart ul,
ul.deflist-flowchart {
display: flex;
flex-direction: column;
justify-content: space-between;
height: 100%;
grid-column-gap: var(--flowchart-column-gap);
margin: 0;
padding: 0;
}
.deflist-flowchart {
margin: 1em 0;
p:first-child {
margin-top: 0;
}
p:last-child {
margin-bottom: 0;
}
li,
li ul
{
margin: 0;
padding: 0;
}
li:empty:not([class])
{
display: None;
}
li {
list-style: none;
}
.arrow-down::after,
.arrow-up::after,
.arrow-multiple.arrow-down::before,
.arrow-multiple.arrow-up::before,
.arrow-cycle::after,
.arrow-cycle::before {
content: "";
}
.arrow-down,
.arrow-up,
.arrow-cycle
{
--arrow-head-size-clamped: calc(min(var(--arrow-head-size), var(--arrow-length) / 2));
display: flex;
justify-content: center;
align-items: center;
flex-grow: 1;
min-height: var(--arrow-length);
width: 100%;
margin: calc(2 * var(--flowchart-spacing)) auto;
position: relative;
z-index: 1;
padding: calc(var(--arrow-length) / 4) 0;
&::before, &::after {
--actual-arrow-length: max(var(--arrow-length), 100%);
--arrow-tail-gradient:
linear-gradient(
45deg,
transparent calc(50% - var(--arrow-thickness)/2),
var(--arrow-color) calc(50% - var(--arrow-thickness)/2),
var(--arrow-color) calc(50% + var(--arrow-thickness)/2),
transparent calc(50% + var(--arrow-thickness)/2)
);
--arrow-head-gradient:
linear-gradient(
-45deg,
var(--arrow-color) var(--arrow-head-size-clamped),
transparent var(--arrow-head-size-clamped)
);
height: calc(var(--actual-arrow-length)/1.4142);
width: auto;
aspect-ratio: 1;
padding: 0;
display: inline-block;
transform: rotate(45deg);
background-image:
var(--arrow-tail-gradient),
var(--arrow-head-gradient);
position: absolute;
top: 0;
left: 50%;
transform-origin: 0 0;
z-index: -1;
}
&.arrow-tail {
&::before, &::after {
background-image:
var(--arrow-tail-gradient);
}
}
> p {
background: linear-gradient(
transparent,
var(--flowchart-bg-color) var(--arrow-fade-dist),
var(--flowchart-bg-color) calc(100% - var(--arrow-fade-dist)),
transparent,
);
line-height: 1.5;
z-index: 10;
}
}
.arrow-down:not(.arrow-tail),
.arrow-cycle {
padding-bottom: calc(var(--arrow-head-size-clamped) + var(--arrow-length) / 4);
}
.arrow-up:not(.arrow-tail),
.arrow-cycle {
padding-top: calc(var(--arrow-head-size-clamped) + var(--arrow-length) / 4);
}
.arrow-cycle, .arrow-multiple {
&::after {
translate: calc(0.5 * var(--arrow-multiple-gap)) 0;
}
&::before {
translate: calc(-0.5 * var(--arrow-multiple-gap)) 0;
}
}
.arrow-up::after,
.arrow-multiple.arrow-up::before,
.arrow-cycle::before
{
transform: rotate(-135deg);
translate: 0 calc(var(--actual-arrow-length) + 2 * var(--flowchart-spacing) + var(--arrow-head-size-clamped) / 2);
}
.arrow-cycle::before {
translate:
calc(-0.5 * var(--arrow-multiple-gap))
140%;
}
.arrow-aside {
margin-left: calc(8 * var(--arrow-head-size-clamped));
&::after {
left: calc(-4 * var(--arrow-head-size-clamped));
}
}
.arrow-multiple-combine {
&::before {
content: "";
width: var(--arrow-multiple-gap);
border: var(--arrow-thickness) solid var(--arrow-color);
height: calc(var(--arrow-length) / 2);
background: var(--flowchart-bg-color);
transform: none;
left: auto;
z-index: 2;
}
&.arrow-down {
padding-top: calc(0.75 * var(--arrow-length) - var(--arrow-head-size-clamped) / 2);
padding-bottom: calc(0.5 * var(--arrow-head-size-clamped) + 0.25 * var(--arrow-length));
&::before {
border-top: 1px solid var(--flowchart-bg-color);
}
}
&.arrow-up {
&::before {
border-bottom: 1px solid var(--flowchart-bg-color);
top: auto;
bottom: -1px;
}
}
}
.arrow-tail {
&.arrow-down {
margin-bottom: 0;
}
&.arrow-up {
margin-top: 0;
}
}
.arrow-head {
&.arrow-up {
margin-bottom: 0;
}
&.arrow-down {
margin-top: 0;
}
}
.arrow-combine, .arrow-combine-left, .arrow-combine-right {
&.arrow-down.arrow-tail, &.arrow-up.arrow-head {
--arrow-combine-gradient-angle: 0deg;
padding-bottom: calc(0.5 * var(--arrow-thickness));
margin-bottom: calc(-0.5 * var(--arrow-thickness));
}
&.arrow-up.arrow-tail, &.arrow-down.arrow-head {
--arrow-combine-gradient-angle: 180deg;
padding-top: calc(0.5 * var(--arrow-thickness));
margin-top: calc(-0.5 * var(--arrow-thickness));
}
background-image:
linear-gradient(
var(--arrow-combine-gradient-angle),
var(--arrow-color) var(--arrow-thickness),
transparent var(--arrow-thickness)
);
background-repeat: no-repeat;
width: calc(max(100% + 2 * var(--flowchart-column-gap), var(--flowchart-unit-width)));
margin-left: calc(-1 * var(--flowchart-column-gap));
&.arrow-combine-left, &.arrow-combine-right {
background-size: 50%;
&.arrow-multiple {
background-size: calc(50% + 0.5 * var(--arrow-multiple-gap));
}
}
&.arrow-combine-right {
background-position-x: 100%;
}
}
> ul > li {
&.arrow-down,
&.arrow-up,
&.arrow-cycle {
width: calc(100% - var(--flowchart-top-label-space));
margin-left: 0;
}
}
dl {
display: flex;
flex-direction: row-reverse;
margin: 0;
padding: 0 var(--flowchart-spacing);
}
dt {
display: inline-block;
writing-mode: vertical-rl;
margin-top: .25rem;
flex-grow: 0;
width: var(--flowchart-top-label-space);
font-size: 1.1em;
}
dd {
text-align: center;
position: relative;
border: 1px solid var(--flowchart-def-border-color);
border-radius: .25rem;
margin: 0;
display: inline-block;
flex-grow: 1;
container-type: inline-size;
container-name: flowchart;
overflow-x: auto;
}
dd dl {
background-color: var(--flowchart-def-bg-color);
border-radius: 4px;
box-shadow: 0 6px 10px 0 rgba(0,0,0,0.14),
0 1px 18px 0 rgba(0,0,0,0.12),
0 3px 5px -1px rgba(0,0,0,0.4);
display: block;
margin: 0 auto;
padding: calc(var(--flowchart-spacing) / 2);
max-width: calc(100cqw - 2 * var(--flowchart-spacing));
min-width: calc(2 * var(--flowchart-unit-width) + var(--flowchart-column-gap));
}
dd dt {
writing-mode: horizontal-tb;
display: block;
margin-top: 0;
width: unset;
font-size: unset;
}
dd dd {
border: none;
display: block;
container-type: unset;
overflow-x: unset;
padding: calc(var(--flowchart-spacing) / 2);
}
dd > ul {
width: fit-content;
padding: var(--flowchart-spacing);
margin: 0 auto;
overflow: hidden;
}
dd dd > ul {
min-width: unset;
padding: 0;
margin: 0;
}
dl a, a {
font-weight: bold;
}
div.flowchart-sidebyside > ul:only-child {
display: flex;
flex-direction: row;
flex-wrap: nowrap;
justify-content: space-between;
}
.flowchart-spacer {
height: 100%;
flex-shrink: 9999;
min-height: calc(2 * var(--flowchart-spacing))
}
.width-1 {
width: calc(var(--flowchart-unit-width));
}
.width-2 {
width: calc(2 * var(--flowchart-unit-width) + var(--flowchart-column-gap));
}
.width-3 {
width: calc(3 * var(--flowchart-unit-width) + 2 * var(--flowchart-column-gap));
}
.width-4 {
width: calc(4 * var(--flowchart-unit-width) + 3 * var(--flowchart-column-gap));
}
.width-5 {
width: calc(5 * var(--flowchart-unit-width) + 4 * var(--flowchart-column-gap));
}
.width-6 {
width: calc(6 * var(--flowchart-unit-width) + 5 * var(--flowchart-column-gap));
}
.width-7 {
width: calc(7 * var(--flowchart-unit-width) + 6 * var(--flowchart-column-gap));
}
.width-8 {
width: calc(8 * var(--flowchart-unit-width) + 7 * var(--flowchart-column-gap));
}
.width-9 {
width: calc(9 * var(--flowchart-unit-width) + 8 * var(--flowchart-column-gap));
}
.width-10 {
width: calc(10 * var(--flowchart-unit-width) + 9 * var(--flowchart-column-gap));
}
li {
&.width-2,
&.width-3,
&.width-4,
&.width-5,
&.width-6,
&.width-7,
&.width-8,
&.width-9,
&.width-10,
&.width-full {
> dl {
max-width: unset;
}
}
}
}
================================================
FILE: docs/_templates/autosummary/base.rst
================================================
.. title:: {{ objname }}
.. currentmodule:: {{ module }}
.. auto{{ objtype }}:: {{ objname }}
================================================
FILE: docs/_templates/autosummary/class.rst
================================================
.. title:: {{ objname }}
.. currentmodule:: {{ module }}
.. auto{{ objtype }}:: {{ objname }}
================================================
FILE: docs/conf.py
================================================
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
from importlib.metadata import version
from inspect import cleandoc
from pathlib import Path
import git
import nbformat
import nbsphinx
from packaging.version import parse
sys.path.insert(0, os.path.abspath("../"))
os.environ["SPHINX"] = "True"
# -- Project information -----------------------------------------------------
project = "OpenFE"
copyright = "2022, The OpenFE Development Team"
author = "The OpenFE Development Team"
# don't include patch version (https://github.com/OpenFreeEnergy/openfe/issues/1261)
version = f"{parse(version('openfe')).major}.{parse(version('openfe')).minor}"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx_click.ext",
"sphinxcontrib.autodoc_pydantic",
"sphinx_toolbox.collapse",
"sphinx.ext.autosectionlabel",
"sphinx_design",
"sphinx.ext.intersphinx",
"sphinx.ext.autosummary",
"docs._ext.sass",
"myst_parser",
"nbsphinx",
"nbsphinx_link",
"sphinx.ext.mathjax",
]
suppress_warnings = ["config.cache"] # https://github.com/sphinx-doc/sphinx/issues/12300
intersphinx_mapping = {
"python": ("https://docs.python.org/3.9", None),
"numpy": ("https://numpy.org/doc/stable", None),
"scikit.learn": ("https://scikit-learn.org/stable", None),
"openmm": ("https://docs.openmm.org/latest/api-python/", None),
"rdkit": ("https://www.rdkit.org/docs", None),
"openeye": ("https://docs.eyesopen.com/toolkits/python/", None),
"mdtraj": ("https://www.mdtraj.org/1.9.5/", None),
"openff.units": ("https://docs.openforcefield.org/projects/units/en/stable", None),
"gufe": ("https://gufe.openfree.energy/en/latest/", None),
}
autoclass_content = "both"
# Make sure labels are unique
# https://www.sphinx-doc.org/en/master/usage/extensions/autosectionlabel.html#confval-autosectionlabel_prefix_document
autosectionlabel_prefix_document = True
autodoc_pydantic_model_show_json = False
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"inherited-members": "GufeTokenizable,BaseModel",
"undoc-members": True,
"special-members": "__call__",
}
toc_object_entries_show_parents = "hide"
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [
"_build",
"**/Thumbs.db",
"**/.DS_Store",
"_ext",
"_sass",
"**/README.md",
"ExampleNotebooks",
]
autodoc_mock_imports = [
"cinnabar",
"dill",
"MDAnalysis",
"matplotlib",
"mdtraj",
"openfe_analysis",
"openmmforcefields",
"openmmtools",
"pymbar",
"openff.interchange",
"openmmforcefields",
"psutil",
"py3Dmol",
"zstandard",
]
# Extensions for the myst parser
myst_enable_extensions = [
"dollarmath",
"colon_fence",
"smartquotes",
"replacements",
"deflist",
"attrs_inline",
]
myst_heading_anchors = 3
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "ofe_sphinx_theme"
html_theme_options = {
"logo": {"text": "OpenFE docs"},
"icon_links": [
{
"name": "GitHub",
"url": "https://github.com/OpenFreeEnergy/openfe",
"icon": "fa-brands fa-square-github",
"type": "fontawesome",
}
],
"accent_color": "cantina-purple",
"navigation_with_keys": False,
}
html_logo = "_static/OFE-color-icon.svg"
html_favicon = "_static/OFE-color-icon.svg"
# temporary fix, see https://github.com/pydata/pydata-sphinx-theme/issues/1662
html_sidebars = {
"installation": [],
"CHANGELOG": [],
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# replace macros
rst_prolog = """
.. |rdkit.mol| replace:: :class:`rdkit.Chem.rdchem.Mol`
"""
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_css_files = [
"css/custom.css",
"css/custom-api.css",
"css/deflist-flowchart.css",
]
# custom-api.css is compiled from custom-api.scss
sass_src_dir = "_sass"
sass_out_dir = "_static/css"
# Clone or update ExampleNotebooks
example_notebooks_path = Path("ExampleNotebooks")
try:
if example_notebooks_path.exists():
repo = git.Repo(example_notebooks_path)
try:
repo.remote("origin").pull()
except git.exc.GitCommandError:
# cannot pull if on a tag
pass
else:
repo = git.Repo.clone_from(
"https://github.com/OpenFreeEnergy/ExampleNotebooks.git",
branch="2026.04.28",
to_path=example_notebooks_path,
)
except Exception as e:
from sphinx.util.logging import getLogger
filename = e.__traceback__.tb_frame.f_code.co_filename
lineno = e.__traceback__.tb_lineno
getLogger("sphinx.ext.openfe_git").warning(
f"Getting ExampleNotebooks failed in {filename} line {lineno}: {e}"
)
# First, create links at top of notebook pages
# All notebooks are in ExampleNotebooks repo, so link to that
# Finally, add sphinx reference anchor in prolog so that we can make refs
nbsphinx_prolog = cleandoc(r"""
{%- set gh_repo = "OpenFreeEnergy/ExampleNotebooks" -%}
{%- set gh_branch = "main" -%}
{%- set path = env.doc2path(env.docname, base=None) -%}
{%- if path.endswith(".nblink") -%}
{%- set path = env.metadata[env.docname]["nbsphinx-link-target"] -%}
{%- endif -%}
{%- if path.startswith("ExampleNotebooks/") -%}
{%- set path = path.replace("ExampleNotebooks/", "", 1) -%}
{%- endif -%}
{%- set gh_url =
"https://www.github.com/"
~ gh_repo
~ "/blob/"
~ gh_branch
~ "/"
~ path
-%}
{%- set dl_url =
"https://raw.githubusercontent.com/"
~ gh_repo
~ "/"
~ gh_branch
~ "/"
~ path
-%}
.. container:: ofe-top-of-notebook
.. button-link:: {{gh_url}}
:color: primary
:shadow:
:outline:
:octicon:`mark-github` View on GitHub
.. button-link:: {{dl_url}}
:color: primary
:shadow:
:outline:
:octicon:`download` Download Notebook
.. _{{ env.doc2path(env.docname, base=None) }}:
""")
================================================
FILE: docs/cookbook/bespoke_parameters.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/bespoke_parameters_showcase.ipynb"
}
================================================
FILE: docs/cookbook/choose_protocol.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/choose_protocol.ipynb"
}
================================================
FILE: docs/cookbook/create_alchemical_network.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/create_alchemical_network.ipynb"
}
================================================
FILE: docs/cookbook/dumping_transformation.rst
================================================
.. _dumping_transformations:
Dumping a ``Transformation`` to JSON
====================================
If you're trying to run a full campaign of simulations representing an
alchemical network, we generally recommend saving objects using our storage
tools, when avoids saving duplicate information to disk.
.. TODO: add links to storage tools once they're complete
However, there are situations where it is reasonable to serialize a single
:class:`.Transformation`. For example, this can be useful when trying to
compare results run on different machines. This also provides a trivial way
for a user to run edges in parallel, if they don't want to use the more
sophisticated techniques we have developed.
For these cases, we have made it very easy for a user to dump a
transformation to JSON. Simply use the method
:meth:`.Transformation.to_json`. For example:
.. code::
transformation.to_json("mytransformation.json")
When you do dump a single transformation, it can be reloaded into memory
with the :meth:`.Transformation.from_json` method:
.. code::
transformation = Transformation.from_json("mytransformation.json")
Once you've saved to it JSON, you can also run this transformation with the
``openfe`` command line tool's :ref:`cli_quickrun`, e.g.:
.. code:: bash
$ openfe quickrun mytransformation.json -d dir_for_files -o output.json
================================================
FILE: docs/cookbook/generate_ligand_network.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/generate_ligand_network.ipynb"
}
================================================
FILE: docs/cookbook/hand_write_ligand_network.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/hand_write_ligand_network.ipynb"
}
================================================
FILE: docs/cookbook/index.rst
================================================
.. _cookbooks:
Cookbook
========
This section describes common tasks involving the OpenFE Python API.
The :any:`OpenFE CLI` provides a simple way to perform the most common procedures for free energy calculations, but does not provide much flexibility for fine-tuning your approach or combining OpenFE with other tools. The :any:`Python API` allows that flexibility, but using it is more complex. This cookbook breaks down common steps that would be implemented in Python to help navigate that complexity.
.. note:: This section is a work-in-progress.
.. module:: openfe
:noindex:
The Basic Workflow
------------------
The typical way to use the Python API is to load a number of molecules you want to calculate free energies of, construct a :class:`LigandNetwork` connecting them in an efficient way, and then combine that with information for how each ligand should be simulated to construct an :class:`AlchemicalNetwork`, which specifies the entire simulation campaign. This provides a lot of flexibility in how molecules are specified, mapped, connected, and simulated, without exposing a great deal of complexity. OpenFE recommends this workflow for most users.
.. container:: deflist-flowchart
* Setup
- .. container:: flowchart-sidebyside
- -
.. rst-class:: flowchart-spacer
-
.. rst-class:: arrow-down arrow-from-nothing
- :any:`choose_protocol`
- :class:`Protocol`
Simulation procedure for an alchemic mutation.
.. rst-class:: arrow-down arrow-tail arrow-combine-right
-
- -
.. rst-class:: width-8
- Chemical component definition
SDF, PDB, RDKit, OpenFF Molecule, solvent spec, etc.
- .. container:: flowchart-sidebyside
- .. rst-class:: width-3
-
.. rst-class:: arrow-down arrow-multiple
- :any:`Loading proteins`, :any:`Defining solvents`
- :class:`SolventComponent` and :class:`ProteinComponent`
Other chemical components needed to simulate the ligand.
.. rst-class:: arrow-down arrow-multiple arrow-tail arrow-combine
-
- - .. container:: flowchart-sidebyside
- .. rst-class:: width-5
-
.. rst-class:: arrow-down arrow-multiple
- :any:`Loading small molecules`
- :class:`SmallMoleculeComponent`
The ligands that will be mutated.
- .. rst-class:: width-3
-
.. rst-class:: flowchart-spacer
-
- Orion/FEP+
Network from another tool.
- .. container:: flowchart-sidebyside
- .. rst-class:: width-2
-
.. rst-class:: arrow-down arrow-multiple
- :any:`generate_ligand_network`
- .. rst-class:: width-2
-
.. rst-class:: arrow-down arrow-multiple
- :any:`hand_write_ligand_network`
- .. rst-class:: width-1
-
.. rst-class:: arrow-down arrow-tail arrow-multiple arrow-combine-right
-
.. rst-class:: flowchart-spacer
-
- .. rst-class:: width-3
-
.. rst-class:: arrow-down arrow-tail arrow-combine-left
-
.. rst-class:: arrow-down arrow-head flowchart-spacer
- :any:`network_from_orion_fepp`
- :class:`LigandNetwork `
A network of ligand transformations.
- .. container:: flowchart-sidebyside
- -
.. rst-class:: arrow-down arrow-tail arrow-combine-left width-4
-
- -
.. rst-class:: arrow-cycle width-4
-
- :any:`ligandnetwork_vis`
.. rst-class:: arrow-down arrow-head
- :any:`create_alchemical_network`
- :class:`AlchemicalNetwork`
A complete simulation campaign.
.. rst-class:: arrow-down
* :any:`dumping_transformations`
* Run
- :any:`openfe quickrun `
OpenFE recommends using the ``openfe quickrun`` CLI command to execute a transformation.
.. rst-class:: arrow-down
*
* Gather
- :any:`openfe gather `
OpenFE recommends using the ``openfe gather`` CLI command to collect the results of a transformation.
List of Cookbooks
-----------------
.. toctree::
:maxdepth: 1
loading_molecules
dumping_transformation
jq_inspection
choose_protocol
generate_ligand_network
rfe_alchemical_planners
network_from_orion_fepp
hand_write_ligand_network
ligandnetwork_vis
create_alchemical_network
user_charges
bespoke_parameters
================================================
FILE: docs/cookbook/jq_inspection.rst
================================================
.. _jq_inspection:
Using ``jq`` to inspect OpenFE JSONs
==============================================
Sometimes you may want to get a sense of the contents of JSON files, but the files are too unwieldy to inspect one-by-one in a code editor.
`jq `_ is a helpful command-line tool that we recommend for for quickly inspecting JSON files.
Below is a common use-case to get you started, but you can do much more by checking out the `jq manual `_.
To view all the top-level JSON keys, use ``jq "keys" filename.json``, for example with a results JSON from the tutorial:
.. code:: bash
$ jq "keys" rbfe_lig_ejm_46_solvent_lig_jmc_28_solvent.json
[
"estimate",
"protocol_result",
"uncertainty",
"unit_results"
]
.. note::
You can use ``"keys[]"`` instead of ``"keys"`` for a cleaner output.
Now that you know ``estimate`` is at the top-level of the JSON, you can use the following pattern to see the next level of keys:
.. code:: bash
$ jq ".estimate | keys " rbfe_lig_ejm_46_solvent_lig_jmc_28_solvent.json
{
"magnitude",
"unit":,
":is_custom:":,
"pint_unit_registry":
}
If you want to show all the keys _and_ their values, simply omit ``| key`` from the query:
.. code:: bash
$ jq ".estimate" rbfe_lig_ejm_46_solvent_lig_jmc_28_solvent.json
{
"magnitude": 23.347074789078682,
"unit": "kilocalorie / mole",
":is_custom:": true,
"pint_unit_registry": "openff_units"
}
This can be very helpful for quickly checking results for many files, for example:
.. code:: bash
$ jq ".estimate.magnitude" rbfe*.json
-14.925911852820793
-40.72063957254803
-27.76541486479537
-16.023754604070007
-57.38608716292447
-15.748326155729705
-39.933880531487326
-27.780933075807425
-16.76023951588401
-58.36294851896545
-19.038006312251575
-20.26856586311034
17.338257573349775
15.775784163095102
23.134622420900932
17.071712542470248
15.873122071409249
23.347074789078682
================================================
FILE: docs/cookbook/ligandnetwork_vis.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/ligandnetwork_vis.ipynb"
}
================================================
FILE: docs/cookbook/loading_molecules.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/loading_molecules.ipynb"
}
================================================
FILE: docs/cookbook/network_from_orion_fepp.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/network_from_orion_fepp.ipynb"
}
================================================
FILE: docs/cookbook/rfe_alchemical_planners.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/rfe_alchemical_planners.ipynb"
}
================================================
FILE: docs/cookbook/user_charges.nblink
================================================
{
"path": "../ExampleNotebooks/cookbook/user_charges.ipynb"
}
================================================
FILE: docs/environment.yaml
================================================
name: openfe-docs
channels:
- https://conda.anaconda.org/conda-forge
# explicit pins to speed up build:
dependencies:
- autodoc-pydantic >= 2.1
- docutils == 0.20
- gitpython
- libsass
- myst-parser
- nbsphinx
- nbsphinx-link
- openff-toolkit-base == 0.17.0
- openff-units == 0.3.1
- openmm == 8.3.1
- packaging
- pip
- plugcli >= 0.2.1
- python
- pydantic >=2.0.0, <2.12.0 # https://github.com/openforcefield/openff-interchange/issues/1346
- sphinx ==7.2.6 # TODO: debug "duplicate object" warning with later versions
- sphinx-click
- sphinx-design
- sphinx-toolbox
- threadpoolctl
- tqdm
- pip:
- git+https://github.com/OpenFreeEnergy/gufe@main
- git+https://github.com/OpenFreeEnergy/ofe-sphinx-theme@v0.3.1
# pip install these so that we can make sure docs build on main while these packages' docs are under development
- git+https://github.com/OpenFreeEnergy/kartograf@main
- git+https://github.com/OpenFreeEnergy/konnektor@main
- git+https://github.com/OpenFreeEnergy/lomap@main
# These are added automatically by RTD, so we include them here
# for a consistent environment.
- mock
- pillow
# - sphinx
# - sphinx_rtd_theme
================================================
FILE: docs/guide/cli/cli_basics.rst
================================================
CLI basics
==========
The ``openfe`` command consists of several subcommands. This is similar to
tools like ``gmx``, which has subcommands like ``gmx mdrun``, or ``conda``,
which has subcommands like ``conda install``.
To get a list of the subcommands and their descriptions, call ``openfe`` (or
``openfe -h``):
.. TODO autogenerate using sphinxcontrib-programoutput
.. code:: none
Usage: openfe [OPTIONS] COMMAND [ARGS]...
This is the command line tool to provide easy access to functionality from
the OpenFE Python library.
Options:
--version Show the version and exit.
--log PATH logging configuration file
-h, --help Show this message and exit.
Network Planning Commands:
plan-rhfe-network Plan a relative hydration free energy network, saved as
JSON files for the quickrun command.
plan-rbfe-network Plan a relative binding free energy network, saved as
JSON files for the quickrun command.
view-ligand-network Visualize a ligand network
Quickrun Executor Commands:
gather Gather result jsons for network of RFE results into a TSV file
quickrun Run a given transformation, saved as a JSON file
Miscellaneous Commands:
fetch Fetch tutorial or other resource.
charge-molecules Generate partial charges for a set of molecules.
test Run the OpenFE test suite
The ``--log`` option takes a logging configuration file and sets that
logging behavior. If you use it, it must come before the subcommand name.
You can find out more about each subcommand by putting ``--help`` *after*
the subcommand name, e.g., ``openfe quickrun --help``, which returns
.. code:: none
Usage: openfe quickrun [OPTIONS] TRANSFORMATION
Run the transformation (edge) in the given JSON file.
Simulation JSON files can be created with the :ref:`cli_plan-rbfe-network`
or from Python a :class:`.Transformation` can be saved using its to_json
method::
transformation.to_json("filename.json")
That will save a JSON file suitable to be input for this command.
Running this command will execute the simulation defined in the JSON file,
creating a directory for each individual task (``Unit``) in the workflow.
For example, when running the OpenMM HREX Protocol a directory will be
created for each repeat of the sampling process (by default 3).
Options:
-d, --work-dir DIRECTORY Directory in which to store files in (defaults to
current directory). If the directory does not
exist, it will be created at runtime.
-o PATH Filepath at which to create and write the JSON-
formatted results.
--resume Attempt to resume this transformation's execution
using the cache.
-h, --help Show this message and exit.
For more details on various commands, see the :ref:`cli-reference`.
================================================
FILE: docs/guide/cli/cli_yaml.rst
================================================
.. _userguide_cli_yaml_interface:
Customising CLI planning with YAML settings
===========================================
The planning commands in the CLI can be made more powerful by supplying
YAML-formatted files to customise the planning algorithms.
This settings file has a series of sections for customising the different algorithms.
For example, the settings file which re-specifies the default behaviour would look like ::
network:
method: generate_minimal_spanning_network
mapper:
method: LomapAtomMapper
settings:
time: 1
threed: True
max3d: 0.95
element_change: True
partial_charge:
method: am1bcc
settings:
off_toolkit_backend: ambertools
The name of the algorithm is given behind the ``method:`` key and the arguments to the
algorithm are then optionally given behind the ``settings:`` key.
All sections of the file ``network:``, ``mapper:`` and ``partial_charge:`` are optional.
The settings YAML file is then provided to the ``-s`` option of ``openfe plan-rbfe-network``: ::
openfe plan-rbfe-network -M molecules.sdf -p protein.pdb -s settings.yaml
Customising the atom mapper
---------------------------
There is a choice to be made as to which atom mapper is used,
currently included are the :class:`.LomapAtomMapper` and the :class:`.KartografAtomMapper` (full details in the `Kartograf documentation`_.)
.. _Kartograf documentation: https://kartograf.readthedocs.io/en/latest/api/kartograf.mappers.html#kartograf.atom_mapper.KartografAtomMapper
For example, to switch to using the ``Kartograf`` atom mapper, this settings YAML could be used ::
mapper:
method: KartografAtomMapper
settings:
atom_max_distance: 0.95
atom_map_hydrogens: True
map_hydrogens_on_hydrogens_only: False
map_exact_ring_matches_only: True
Customising the network planner
-------------------------------
There are a variety of network planning options available, including
:func:`.generate_radial_network`,
:func:`.generate_minimal_spanning_network`, and
:func:`.generate_minimal_redundant_network`.
For example, to plan a radial network using a ligand called 'CHEMBL1078774' as the central ligand, this settings YAML
could be given ::
network:
method: generate_radial_network
settings:
central_ligand: CHEMBL1078774
Where the required ``central_ligand`` argument has been passed inside the ``settings:`` section.
Note that there is a subtle distinction when ligand names could be interpreted as integers.
To select the first ligand, the **integer** 0 can be given ::
network:
method: generate_radial_network
settings:
central_ligand: 0
Whereas if we wanted to specify the ligand named "0", we would instead explicitly pass this as **a string** to the YAML
settings file ::
network:
method: generate_radial_network
settings:
central_ligand: '0'
Customising the partial charge generation
-----------------------------------------
There are a range of partial charge generation schemes available, including
* ``am1bcc``
* ``am1bccelf10`` (only possible if ``off_toolkit_backend`` in settings is set to ``openeye``)
* ``nagl`` (must have ``openff-nagl`` installed)
* ``espaloma`` (must have ``espaloma_charge`` installed)
The following settings can also be set
* ``off_toolkit_backend`` The backend to use for partial charge generation. Choose from ``ambertools`` (default), ``openeye`` or ``rdkit``.
* ``number_of_conformers`` The number of conformers to use for partial charge generation. If unset (default), the input conformer will be used.
* ``nagl_model``: The NAGL model to use. If unset (default), the latest available production charge model will be used.
For example, to generate the partial charges using the ``am1bccelf10`` method from ``openeye`` the following should be added to the YAML settings file ::
partial_charge:
method: am1bccelf10
settings:
off_toolkit_backend: openeye
For more information on the different options, please refer to the :class:`.OpenFFPartialChargeSettings`.
================================================
FILE: docs/guide/cli/index.rst
================================================
.. _userguide_cli_interface:
CLI Interface
=============
In addition to the powerful Python API, OpenFE provides a simple command
line interface to facilitate some more common (and less complicated) tasks.
The Python API tries to be as easy to use as possible, but the CLI provides
wrappers around some parts of the Python API to make it easier to integrate
into non-Python workflows.
.. toctree::
cli_basics
cli_yaml
================================================
FILE: docs/guide/execution/execution_theory.rst
================================================
.. _userguide_execution_theory:
Protocols and the Execution Model Theory
========================================
Protocols in OpenFE are built on a flexible execution model.
Result objects are shaped by this model, and therefore some basic
background on it can be useful when looking into the details of simulation
results. In general, most users don't need to work with the details of the
execution model, but the general ideas can be useful.
.. TODO figure showing an example dag
Each protocol involves a number of steps (called ``ProtocolUnit``\ s) which occur in
some order. Formally, this is described by a directed acyclic graph (DAG),
so the collection of steps to run is called a ``ProtocolDAG``. A
:class:`.Protocol` creates the ``ProtocolDAG``, and a single ``ProtocolDAG``
should give information necessary to obtain an estimate of the desired
thermodynamic observable. Over the course of a campaign, a single
:class:`.Protocol` may create multiple ``ProtocolDAG``\ s, e.g., to extend a
simulation. NB: While independent runs can be created as separate
``ProtocolDAG``\ s, the recommend way to do independent runs is as a
``repeats`` part of the settings for the protocol, which puts the
independent runs in a single ``ProtocolDAG``.
.. TODO review recommendation for repeats in context of NEQ protocol
There are results objects at each level of this: so the
:class:`.ProtocolResult` is associated with the :class:`.Protocol`. Just as
the :class:`.Protocol` may create one or more ``ProtocolDAG``\ s, the
:class:`.ProtocolResult` will be made from one or more
:class:`.ProtocolDAGResult`\ s. Finally, each :class:`.ProtocolDAGResult`
may carry information about multiple :class:`.ProtocolUnitResult`\ s, just a
single ``ProtocolDAG`` may involve multiple ``ProtocolUnit``\ s.
.. TODO FUTURE: figure showing the relations of protocol objects and result
objects
.. TODO FUTURE: add information about scratch/shared/permanent storage
once that becomes relevant
================================================
FILE: docs/guide/execution/index.rst
================================================
.. _userguide_execution:
Execution
=========
With a :class:`.Transformation` defined, the next step is to execute this.
The easiest way to run it is to use the :ref:`quickrun CLI tool `.
More advanced options are available through first considering the
:ref:`theory of the execution model`
then :ref:`reading on the available Python functions`.
.. toctree::
quickrun_execution
execution_theory
================================================
FILE: docs/guide/execution/quickrun_execution.rst
================================================
.. _userguide_quickrun:
Execution with Quickrun
=======================
The planning and preparation of a campaign of alchemical simulations using ``openfe`` is intended to be achievable on a local workstation in a matter of minutes.
The *execution* of these simulations however requires a large amount of computational power, and beyond running single calculations locally, is intended to be distributed across a HPC environment.
Doing this requires storing and sending the details of the simulation from the local workstation to a HPC environment, which can be done via the :func:`.Transformation.to_json` function which :ref:`creates a saved JSON version of the data`.
These serialized JSON files are the currency of executing a campaign of simulations and contain all the information required to execute a single simulation.
To read the ``Transformation`` information and execute the simulation, the command line interface provides the ``openfe quickrun`` command, the full details of which are given in :ref:`the CLI reference section`.
Basic quickrun usage
--------------------
The ``quickrun`` command takes in the ``Transformation`` information represented as JSON, then executes a simulation according to those specifications.
For example, the following command executes a simulation defined by ``transformation.json`` and produces a results file named ``results.json``.
::
> openfe quickrun transformation.json -d workdir/ -o workdir/results.json
The ``-d`` / ``--work-dir`` flag controls where working files (checkpoints, trajectory data, etc...) are written.
If it is omitted, the current directory will be used.
The ``-o`` flag controls where the results file will be written.
If it is omitted, results are written to a file named ``_results.json`` in the working directory, where ```` is a unique identifier.
Resuming a halted job
---------------------
When ``openfe quickrun`` starts, it saves a plan of the simulation to a cache file before execution begins:
.. code:: bash
/quickrun_cache/dag-cache-.json
Where ```` is a unique identifier based on the ``-o`` file path and Transformation.
This cache is automatically removed once the job completes.
If a job is interrupted (e.g. due to a wall-time limit, node failure, or manual cancellation), you can resume the interrupted job by passing the ``--resume`` flag:
.. code:: bash
> openfe quickrun transformation.json -d workdir/ -o workdir/results.json --resume
The planned simulation cache will be used to identify where in the simulation process it left off and, if supported by the Transformation Protocol, how to resume.
.. note::
The same ``-d`` / ``--work-dir`` and ``-o`` flag arguments used in the
original run must be specified so that ``quickrun`` can locate the cache file.
If you pass ``--resume`` but no cache file is found (e.g. the job never started), the following warning is printed and a fresh execution begins.
.. code:: bash
openfe quickrun was run with --resume, but no cached results found at
. Starting new execution.
If the cache file is corrupted (e.g. due to an incomplete write at the moment of interruption), ``quickrun --resume`` will raise an error with instructions to rerun the simulation:
.. code:: bash
Recovery failed, please remove /quickrun_cache/dag-cache-.json
before executing a new transformation simulation.
If you do not pass the ``--resume`` flag, the code will detect the partially complete transformation and prevent you from accidentally starting a duplicate run.
The following error will be raised:
.. code:: bash
Transformation has been started but is incomplete. Please remove
/quickrun_cache/dag-cache-.json and rerun, or resume
execution using the ``--resume`` flag.
Executing within a job submission script
----------------------------------------
You may need to submit computational jobs to a queueing engine, such as Slurm.
The ``openfe quickrun`` command can be used within a submission script as follows:
.. code-block:: bash
#!/bin/bash
#SBATCH --job-name="openfe job"
#SBATCH --mem-per-cpu=2G
# activate an appropriate conda environment, or any "module load" commands required to
conda activate openfe_env
openfe quickrun transformation.json -d workdir/ -o workdir/results.json
Parallel execution of repeats with Quickrun
===========================================
Serial execution of multiple repeats of a transformation can be inefficient when simulation times are long.
Higher throughput can be achieved with parallel execution by running one repeat per HPC job.
Most protocols are set up to run three repeats in serial by default, but this can be changed by either:
1. Defining the protocol setting ``protocol_repeats`` - see the :ref:`protocol configuration guide ` for more details.
2. Using the ``openfe plan-rhfe-network`` (or ``plan-rbfe-network``) command line flag ``--n-protocol-repeats``.
Each transformation can then be executed multiple times via the ``openfe quickrun`` command to produce a set of repeats.
However, **you must use unique results files for each repeat to ensure they don't overwrite each other**.
We recommend using folders named ``results_x`` where x is 0-2 to store the repeated calculations as our :ref:`openfe gather ` command also supports this file structure.
Below is an example of a simple script that will create and submit a separate job script (``\*.job`` named file) for every alchemical transformation (for the simplest SLURM use case) in a network running each repeat in parallel and writing the results to a unique folder:
.. code-block:: bash
for file in network_setup/transformations/*.json; do
relpath="${file:30}" # strip off "network_setup/"
dirpath=${relpath%.*} # strip off final ".json"
jobpath="network_setup/transformations/${dirpath}.job"
if [ -f "${jobpath}" ]; then
echo "${jobpath} already exists"
exit 1
fi
for repeat in {0..2}; do
cmd="openfe quickrun ${file} -o results_${repeat}/${relpath} -d results_${repeat}/${dirpath} --n-protocol-repeats 1"
echo -e "#!/usr/bin/env bash\n${cmd}" > "${jobpath}"
sbatch "${jobpath}"
done
done
This should result in the following file structure after execution:
::
results_parallel
├── results_0
│ ├── rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json
│ ├── shared_HybridTopologyMultiStateAnalysisUnit-5e0825de1dd045818cdc3428205c1cf7_attempt_0
│ │ ├── forward_reverse_convergence.png
│ │ ├── ligand_RMSD.png
│ │ ├── mbar_overlap_matrix.png
│ │ ├── replica_exchange_matrix.png
│ │ ├── replica_state_timeseries.png
│ │ └── structural_analysis.npz
│ ├── shared_HybridTopologyMultiStateSimulationUnit-144be594cf024cb19152cfe5e0b3fb7d_attempt_0
│ │ ├── checkpoint.chk
│ │ ├── simulation.nc
│ │ └── simulation_real_time_analysis.yaml
│ └── shared_HybridTopologySetupUnit-01b5afe1972c4e2f9d0943da43b4b19c_attempt_0
│ ├── A_db.json
│ ├── B_db.json
│ ├── hybrid_positions.npy
│ ├── hybrid_system.pdb
│ └── hybrid_system.xml.bz2
├── results_1
│ ├── rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json
│ ├── shared_HybridTopologyMultiStateAnalysisUnit-7986bec616a74929aee85e900535f4a2_attempt_0
│ │ ├── forward_reverse_convergence.png
│ │ ├── ligand_RMSD.png
│ │ ├── mbar_overlap_matrix.png
│ │ ├── replica_exchange_matrix.png
│ │ ├── replica_state_timeseries.png
│ │ └── structural_analysis.npz
│ ├── shared_HybridTopologyMultiStateSimulationUnit-18eb295b7123444f9ac66ff3caffcab8_attempt_0
│ │ ├── checkpoint.chk
│ │ ├── simulation.nc
│ │ └── simulation_real_time_analysis.yaml
│ └── shared_HybridTopologySetupUnit-3d8ccb1ef5124bd4ba20e0047aad0b5f_attempt_0
│ ├── A_db.json
│ ├── B_db.json
│ ├── hybrid_positions.npy
│ ├── hybrid_system.pdb
│ └── hybrid_system.xml.bz2
└── results_2
├── rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json
├── shared_HybridTopologyMultiStateAnalysisUnit-ac5fad8ad1fb49598f80018713dce070_attempt_0
│ ├── forward_reverse_convergence.png
│ ├── ligand_RMSD.png
│ ├── mbar_overlap_matrix.png
│ ├── replica_exchange_matrix.png
│ ├── replica_state_timeseries.png
│ └── structural_analysis.npz
├── shared_HybridTopologyMultiStateSimulationUnit-73abea21b423444881bd8f21415c937f_attempt_0
│ ├── checkpoint.chk
│ ├── simulation.nc
│ └── simulation_real_time_analysis.yaml
└── shared_HybridTopologySetupUnit-79bc9b63321945338a3b69d9f94ee15b_attempt_0
├── A_db.json
├── B_db.json
├── hybrid_positions.npy
├── hybrid_system.pdb
└── hybrid_system.xml.bz2
The results of which can be gathered from the CLI using the ``openfe gather`` command, in this case you should direct
it to the root directory which includes the repeat results and it will automatically collate the information
::
> openfe gather results_parallel
Optimizing GPU performance with NVIDIA MPS
==========================================
You can further optimize execution of ``openfe quickrun`` using NVIDIA's Multi-Process Service (MPS).
See NVIDIA's documentation on `MPS for OpenFE free energy calculations `_ for details.
See Also
--------
- :ref:`userguide_results` - details on inspecting these results.
- :ref:`cli-reference` - full CLI reference for ``openfe quickrun``
- :ref:`rbfe_cli_tutorial` - a tutorial on how to use the CLI to run hybrid topology relative binding free energy calculations.
================================================
FILE: docs/guide/index.rst
================================================
User\ |nbsp|\ Guide
===================
.. toctree::
:maxdepth: 2
introduction
setup/index
execution/index
results/index
cli/index
protocols/index
under_the_hood
troubleshooting
.. |nbsp| unicode:: 0xA0 .. copyright sign
:trim:
================================================
FILE: docs/guide/introduction.rst
================================================
.. _guide-introduction:
Introduction
============
Here we present an overview of the workflow for calculating free energies in
OpenFE in the broadest strokes possible. This workflow is reflected in both
the Python API and in the command line interface, and so we have a section
for each.
Workflow overview
-----------------
The overall workflow of OpenFE involves three stages:
1. :ref:`Simulation setup `: Defining the simulation campaign you are going to run.
2. :ref:`Execution `: Running and performing initial analysis of your
simulation campaign.
3. :ref:`Gather results `: Assembling the results from the simulation
campaign for further analysis.
In many use cases, these stages may be done on different machines. For
example, you are likely to make use of HPC or cloud computing resources to
run the simulation campaign. Because of this, each stage has a defined output which
is then the input for the next stage:
.. TODO make figure
.. .. figure:: ???
:alt: Setup -> (AlchemicalNetwork) -> Execution -> (ProtocolResults) -> Gather
The main stages of a free energy calculation in OpenFE, and the intermediates between them.
The output of the :ref:`simulation setup ` stage is an :class:`.AlchemicalNetwork`. This contains all
the information about what is being simulated (e.g., what ligands, host proteins, solvation details, etc.) and the
information about how to perform the simulation (the Protocol).
The output of the :ref:`execution ` stage is the basic results from each edge.
This can depend of the specific analysis intended, but will either involve a
:class:`.ProtocolResult` representing the calculated :math:`\Delta G` for
each edge or the :class:`.ProtocolDAGResult` linked to the data needed to
calculate that :math:`\Delta G`.
The :ref:`gather results ` stage aggregates the individual results for further analysis. For example, the CLI's ``gather`` command will create a
table of the :math:`\Delta G` for each leg.
For more workflow details, see :ref:`under-the-hood`.
.. TODO: Should the CLI workflow be moved to under "CLI Interface"?
CLI Workflow
------------
We have separate CLI commands for each stage of setup, execution, and
gathering results. With the CLI, the Python objects of
:class:`.AlchemicalNetwork` and :class:`.ProtocolResult` are stored to disk
in an intermediate representation between the commands.
.. TODO make figure
.. .. figure:: ???
:alt: [NetworkPlanner -> AlchemicalNetwork] -> Transformation JSON -> quickrun -> Result JSON -> gather
The CLI workflow, with intermediates. The setup stage uses a network
planner to generate the network, before saving each transformation as a
JSON file.
The commands used to generate an :class:`.AlchemicalNetwork` using the CLI are:
* :ref:`cli_plan-rbfe-network`
* :ref:`cli_plan-rhfe-network`
.. note::
To ensure a consistent set of partial charges are used for each molecule across different transformations, the CLI
network planners will now automatically generate charges ahead of planning the network. The partial charge generation
scheme can be configured using the :ref:`YAML settings `. We also provide tooling to
generate the partial charges as a separate CLI step which can be run before network planning, see the :ref:`tutorial `
for more details.
For example, you can create a relative binding free energy (RBFE) network using
.. code:: bash
$ openfe plan-rbfe-network -p protein.pdb -M dir_with_sdfs/
This will save the alchemical network represented as a JSON file for each
edge of the :class:`.AlchemicalNetwork` (i.e., each leg of the alchemical cycle).
To run a given transformation, use the :ref:`cli_quickrun`; for example:
.. code:: bash
$ openfe quickrun mytransformation.json -d dir_for_files -o output.json
In many cases, you will want to create a job script for a queuing system
(e.g., SLURM) that wraps that command. You can do this for all JSON files
from the network planning command with something like this:
.. TODO Link to example here. I think this is waiting on the CLI example
being merged into example notebooks?
Finally, assuming all results (and only results) are in the `results/` directory,
use the :ref:`cli_gather` to generate a summary table:
.. code:: bash
$ openfe gather ./results/ -o final_results.tsv
This will output a tab-separated file with the ligand pair, the estimated
:math:`\Delta G` and the uncertainty in that estimate.
The CLI provides a very straightforward user experience that works with the
most simple use cases. For use cases that need more workflow customization,
the Python API makes it relatively straightforward to define exactly the
simulation you want to run. The next sections of this user guide will
illustrate how to customize the behavior to your needs.
================================================
FILE: docs/guide/protocols/absolutebinding.rst
================================================
.. _userguide_abfe_protocol:
Absolute Binding Protocol
=========================
Overview
--------
The :class:`AbsoluteBindingProtocol <.AbsoluteBindingProtocol>` calculates the absolute binding free energy,
which is the free energy difference between a ligand in solution and the ligand bound to a protein.
The absolute binding free energy is calculated through a thermodynamic cycle.
In this cycle, the interactions of the molecule are decoupled, meaning turned off,
using a partial annihilation scheme (see below) both in the solvent and in the complex phases.
Restraints are required to keep the weakly
coupled and fully decoupled ligand in the binding site region and thereby reduce the phase
space that needs to be sampled. In the :class:`AbsoluteBindingProtocol <.AbsoluteBindingProtocol>`
we apply orientational, or Boresch-style, restraints, as described below.
The absolute binding free energy is then obtained via summation of free energy differences along the thermodynamic cycle.
.. figure:: img/abfe-cycle.png
:scale: 50%
Thermodynamic cycle for the absolute binding free energy protocol.
Scientific Details
------------------
Orientational restraints
~~~~~~~~~~~~~~~~~~~~~~~~
Orientational, or Boresch-style, restraints are automatically (unless manually specified) applied between three
protein and three ligand atoms using one bond, two angle, and three dihedral restraints.
Reference atoms are picked based on different criteria, such as the root mean squared
fluctuation of the atoms in a short MD simulation, the secondary structure of the protein,
and the distance between atoms, based on heuristics from Baumann et al. [1]_ and Alibay et al. [2]_.
Two strategies for selecting protein atoms are available, either picking atoms that are bonded to each other or that can span multiple residues.
This can be specified using the ``restraint_settings.anchor_finding_strategy`` settings.
Partial annihilation scheme
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the :class:`.AbsoluteBindingProtocol` the coulombic interactions of the molecule are fully turned off (annihilated).
The Lennard-Jones interactions are instead decoupled, meaning the intermolecular interactions are turned off, keeping the intramolecular Lennard-Jones interactions.
The lambda schedule
~~~~~~~~~~~~~~~~~~~
Molecular interactions are turned off during an alchemical path using a discrete set of lambda windows.
For the transformation in the binding site, the following steps are carried out, starting with the ligand fully interacting in the binding site.
1. Restrain the ligand using orientational restraints.
2. Turn off the electrostatic interactions of the ligand.
3. Decouple Lennard-Jones interactions of the ligand.
4. Release the restraints of the now dummy ligand analytically.
The lambda schedule in the solvent phase is similar to the one in the complex, except that no restraints are applied.
A soft-core potential is applied to the Lennard-Jones potential to avoid instablilites in intermediate lambda windows.
The soft-core potential function from Beutler et al. [3]_ is used by default.
The lambda schedule is defined in the ``lambda_settings`` objects ``lambda_elec``, ``lambda_vdw``, and ``lambda_restraints``.
Simulation overview
~~~~~~~~~~~~~~~~~~~
The :class:`.ProtocolDAG` of the :class:`.AbsoluteBindingProtocol` contains :class:`.ProtocolUnit`\ s from both the complex and solvent transformations.
This means that both legs of the thermodynamic cycle are constructed and run concurrently in the same :class:`.ProtocolDAG`.
This is different from the :class:`.RelativeHybridTopologyProtocol` where the :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG` contains multiple :class:`.ProtocolUnit`\ s of both complex and solvent transformations.
Simulation steps
""""""""""""""""
Each :class:`.ProtocolUnit` (whether complex or solvent) carries out the following steps:
1. Parameterize the system using `OpenMMForceFields `_ and `Open Force Field `_.
2. Equilibrate the fully interacting system using a short MD simulation using the same approach as the :class:`.PlainMDProtocol` (including rounds of NVT and NPT equilibration).
3. Create an alchemical system.
4. Add orientational restraints to the complex system.
5. Minimize the alchemical system.
6. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions).
7. Analyze results for the transformation.
.. note:: Three different types of multistate sampling (i.e. replica swapping between lambda states) methods can be chosen; HREX, SAMS, and independent (no lambda swaps attempted).
By default the HREX approach is selected, this can be altered using ``solvent_simulation_settings.sampler_method`` or ``complex_simulation_settings.sampler_method`` (default: ``repex``).
Simulation details
""""""""""""""""""
Here are some details of how the simulation is carried out which are not detailed in the :class:`.AbsoluteBindingSettings`:
* The protocol applies a `LangevinMiddleIntegrator `_ which uses Langevin dynamics, with the LFMiddle discretization [4]_.
* A MonteCarloBarostat is used in the NPT ensemble to maintain constant pressure.
Getting the free energy estimate
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The free energy differences are obtained from simulation data using the `MBAR estimator `_ (multistate Bennett acceptance ratio estimator) as implemented in the `PyMBAR package `_.
Both the MBAR estimates of the two legs of the thermodynamic cycle, and the overall absolute binding free energy (of the entire cycle) are obtained,
which is different compared to the results in the :class:`.RelativeHybridTopologyProtocol` where results from two legs of the thermodynamic cycle are obtained separately.
In addition to the estimates of the free energy changes and their uncertainty, the protocol also returns some metrics to help assess convergence of the results, these are detailed in the :ref:`multistate analysis section `.
See Also
--------
**Setting up AFE calculations**
* :ref:`Defining the Protocol `
**Tutorials**
* :any:`Absolute Binding Free Energies tutorial <../../tutorials/abfe_tutorial>`
**Cookbooks**
:ref:`Cookbooks `
**API Documentation**
* :ref:`OpenMM Absolute Binding Free Energy `
* :ref:`OpenMM Protocol Settings `
References
----------
* `pymbar `_
* `yank `_
* `OpenMMTools `_
* `OpenMM `_
.. [1] Broadening the Scope of Binding Free Energy Calculations Using a Separated Topologies Approach, H. Baumann, E. Dybeck, C. McClendon, F. Pickard IV, V. Gapsys, L. Pérez-Benito, D. Hahn, G. Tresadern, A. Mathiowetz, D. Mobley, J. Chem. Theory Comput., 2023, 19, 15, 5058–5076
.. [2] Evaluating the use of absolute binding free energy in the fragment optimisation process, I. Alibay, A. Magarkar, D. Seeliger, P. Biggin, Commun Chem 5, 105 (2022)
.. [3] Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations, T.C. Beutler, A.E. Mark, R.C. van Schaik, P.R. Greber, and W.F. van Gunsteren, Chem. Phys. Lett., 222 529–539 (1994)
.. [4] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
================================================
FILE: docs/guide/protocols/absolutesolvation.rst
================================================
Absolute Solvation Protocol
===========================
Overview
--------
The :class:`AbsoluteSolvationProtocol <.AbsoluteSolvationProtocol>` calculates the free energy change
associate with transferring a molecule from vacuum into a solvent.
.. note::
Currently, water is the only supported solvent, however, more solvents might be possible in the future.
The absolute solvation free energy is calculated through a thermodynamic cycle.
In this cycle, the interactions of the molecule are decoupled, meaning turned off, using a partial annihilation scheme (see below) both in the solvent and in the vacuum phases.
The absolute solvation free energy is then obtained via summation of free energy differences along the thermodynamic cycle.
.. figure:: img/ahfe_thermocycle.png
:scale: 80%
Thermodynamic cycle for the absolute solvation free energy protocol.
Scientific Details
------------------
Partial annihilation scheme
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the :class:`.AbsoluteSolvationProtocol` the coulombic interactions of the molecule are fully turned off (annihilated).
The Lennard-Jones interactions are instead decoupled, meaning the intermolecular interactions turned off, keeping the intramolecular Lennard-Jones interactions.
The lambda schedule
~~~~~~~~~~~~~~~~~~~
Molecular interactions are turned off during an alchemical path using a discrete set of lambda windows. The electrostatic interactions are turned off first, followed by the decoupling of the Lennard-Jones interactions.
A soft-core potential is applied to the Lennard-Jones potential to avoid instablilites in intermediate lambda windows.
Both the soft-core potential functions from Beutler et al. [1]_ and from Gapsys et al. [2]_ are available and can be specified in the ``alchemical_settings.softcore_LJ`` settings
(default: ``gapsys``).
The lambda schedule is defined in the ``lambda_settings`` objects ``lambda_elec`` and ``lambda_vdw``. Note that the ``lambda_restraints`` setting is ignored for the :class:`.AbsoluteSolvationProtocol`.
Simulation overview
~~~~~~~~~~~~~~~~~~~
The :class:`.ProtocolDAG` of the :class:`.AbsoluteSolvationProtocol` contains :class:`.ProtocolUnit`\ s from both the vacuum and solvent transformations.
This means that both legs of the thermodynamic cycle are constructed and run concurrently in the same :class:`.ProtocolDAG`. This is different from the :class:`.RelativeHybridTopologyProtocol` where the :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG` contains multiple :class:`.ProtocolUnit`\ s of both vacuum and solvent transformations.
Simulation steps
""""""""""""""""
Each :class:`.ProtocolUnit` (whether vacuum or solvent) carries out the following steps:
1. Parameterize the system using `OpenMMForceFields `_ and `Open Force Field `_.
2. Equilibrate the fully interacting system using a short MD simulation using the same approach as the :class:`.PlainMDProtocol` (in the solvent leg this will include rounds of NVT and NPT equilibration).
3. Create an alchemical system.
4. Minimize the alchemical system.
5. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions if solvent is present).
6. Analyze results for the transformation.
Note: three different types of multistate sampling (i.e. replica swapping between lambda states) methods can be chosen; HREX, SAMS, and independent (no lambda swaps attempted). By default the HREX approach is selected, this can be altered using ``solvent_simulation_settings.sampler_method`` or ``vacuum_simulation_settings.sampler_method`` (default: ``repex``).
Simulation details
""""""""""""""""""
Here are some details of how the simulation is carried out which are not detailed in the :class:`.AbsoluteSolvationSettings`:
* The protocol applies a `LangevinMiddleIntegrator `_ which uses Langevin dynamics, with the LFMiddle discretization [3]_.
* A MonteCarloBarostat is used in the NPT ensemble to maintain constant pressure.
Getting the free energy estimate
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The free energy differences are obtained from simulation data using the `MBAR estimator `_ (multistate Bennett acceptance ratio estimator) as implemented in the `PyMBAR package `_.
Both the MBAR estimates of the two legs of the thermodynamic cycle, and the overall absolute solvation free energy (of the entire cycle) are obtained,
which is different compared to the results in the :class:`.RelativeHybridTopologyProtocol` where results from two legs of the thermodynamic cycle are obtained separately.
In addition to the estimates of the free energy changes and their uncertainty, the protocol also returns some metrics to help assess convergence of the results, these are detailed in the :ref:`multistate analysis section `.
.. todo: issue 792 change this reference to point to the new results section
See Also
--------
**Setting up AFE calculations**
* :ref:`Defining the Protocol `
..
To be added: Setting up AHFE calculations
**Tutorials**
* :any:`Absolute Hydration Free Energies tutorial <../../tutorials/ahfe_tutorial>`
**Cookbooks**
:ref:`Cookbooks `
**API Documentation**
* :ref:`OpenMM Absolute Solvation Free Energy `
* :ref:`OpenMM Protocol Settings `
References
----------
* `pymbar `_
* `yank `_
* `OpenMMTools `_
* `OpenMM `_
.. [1] Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations, T.C. Beutler, A.E. Mark, R.C. van Schaik, P.R. Greber, and W.F. van Gunsteren, Chem. Phys. Lett., 222 529–539 (1994)
.. [2] New Soft-Core Potential Function for Molecular Dynamics Based Alchemical Free Energy Calculations, V. Gapsys, D. Seeliger, and B.L. de Groot, J. Chem. Theor. Comput., 8 2373-2382 (2012)
.. [3] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
================================================
FILE: docs/guide/protocols/index.rst
================================================
.. _userguide_protocols:
Details of Specific Protocols
=============================
Details on the theory and behaviour of different Protocols are listed here.
.. toctree::
relativehybridtopology
absolutebinding
absolutesolvation
septop
plainmd
================================================
FILE: docs/guide/protocols/plainmd.rst
================================================
Plain MD Protocol
=================
Overview
--------
The :class:`.PlainMDProtocol` enables the user to run a Molecular Dynamics (MD) simulation of a :class:`.ChemicalSystem`, which can contain e.g. a solvated protein-ligand complex, a molecule and water, or a molecule in vacuum.
.. todo: Later add ref to ChemicalSystem section
Scientific Details
------------------
The :class:`.PlainMDProtocol` runs MD simulations of a system either in solvent or vacuum, depending on the input provided by the user in the :class:`.ChemicalSystem`.
The protocol applies a
`LangevinMiddleIntegrator `_
which uses Langevin dynamics, with the LFMiddle discretization [1]_.
Simulation Steps and Outputs
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If there is a ``SolventComponent`` in the :class:`.ChemicalSystem`, the each :class:`.ProtocolUnit` carries out the following steps:
.. list-table::
:widths: 50 50
:header-rows: 1
* - Step
- Outputs (with default names)
* - 1. Parameterize the system using `OpenMMForceFields `_ and `Open Force Field `_
- Forcefield cache (``db.json``)
* - 2. OpenMM object creation
- Structure of the full system (``system.pdb``)
* - 3. Minimize the system
- Minimized Structure (``minimized.pdb``)
* - 4. Equilibrate in the canonical (NVT) ensemble
- NVT equilibrated structure (``equil_nvt.pdb``)
* - 5. Equilibrate the system under isobaric-isothermal (NPT) conditions
- NPT equilibrated structure (``equil_npt.pdb``)
* - 6. Production simulate the system under isobaric-isothermal (NPT) conditions
- Simulation trajectory (``simulation.xtc``), Checkpoint file (``checkpoint.chk``), Log output (``simulation.log``)
A MonteCarloBarostat is used in the NPT ensemble to maintain constant pressure.
Relevant settings under solvent conditions include the solvation settings that control the ``solvent_model`` and ``solvent_padding``.
If the :class:`.ChemicalSystem` does not contain a ``SolventComponent``, the protocol runs an MD simulation in vacuum. After a minimization, the protocol performs an equilibration, followed by a production run with no periodic boundary conditions and infinite cutoffs. Settings that control the barostat or the solvation are ignored for vacuum MD simulations.
Performance consideration for gas phase MD simulations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
For gas phase MD simulations, we suggest setting ``OPENMM_CPU_THREADS=1`` to obtain good performance.
See Also
--------
**Tutorials**
* :any:`MD tutorial <../../tutorials/md_tutorial>`
**API Documentation**
* :ref:`OpenMM plain MD protocol `
* :ref:`OpenMM Protocol Settings `
References
----------
* `OpenMMTools `_
* `OpenMM `_
.. [1] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
================================================
FILE: docs/guide/protocols/relativehybridtopology.rst
================================================
.. _userguide_relative_hybrid_topology_protocol:
Relative Hybrid Topology Protocol
=================================
Overview
--------
The relative free energy calculation approach calculates the difference in
free energy between two similar ligands. Depending on the :class:`.ChemicalSystem`
provided, the protocol either calculates the relative binding free energy
(RBFE), or the relative hydration free energy (RHFE).
Further information on constructing chemical systems to define thermodynamic cycles,
see :ref:`userguide_chemicalsystems_and_components`
In a thermodynamic
cycle, one ligand is converted into the other ligand by alchemically
transforming the atoms that vary between the two ligands. The
transformation is carried out in both environments, meaning both in the
solvent (ΔG\ :sub:`solv`\) and in the binding site (ΔG\ :sub:`site`\) for RBFE calculations
and in the solvent (ΔG\ :sub:`solv`\) and vacuum (ΔG\ :sub:`vacuum`\) for RHFE calculations.
.. _label: Thermodynamic cycle for the relative binding free energy protocol
.. figure:: img/rbfe_thermocycle.png
:scale: 50%
Thermodynamic cycle for the relative binding free energy protocol.
Scientific Details
------------------
This :class:`.RelativeHybridTopologyProtocol` is based off the `Perses implementation `_.
The Hybrid Topology approach
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The :class:`.RelativeHybridTopologyProtocol` uses a hybrid topology approach to represent the two
ligands, meaning that a single set of coordinates is used to represent the
common core of the two ligands while the atoms that differ between the two
ligands are represented separately. An atom map defines which atoms belong
to the core (mapped atoms) and which atoms are unique and represented
separately (see :ref:`Creating atom mappings `). During the alchemical transformation, mapped atoms are interpolated
from their type in the ligand at state A to the type in the other ligand at state B, while unique atoms
atoms (commonly known as dummy atoms) are switched, inserted or uncoupled, depending on which ligand they belong to. By default all nonbonded interactions between the
dummy region and the core region are removed to avoid coupling their motion.
.. note:: In this hybrid topology approach, all bonded interactions between the dummy region and the core region are kept.
As pointed out by Fleck et al. [1]_, this can lead to systematic errors if the contribution of the dummy group does not cancel out
in the thermodynamic cycle (no separability of the partition function). We are currently working on fixing this issue.
The lambda schedule
~~~~~~~~~~~~~~~~~~~
The protocol interpolates molecular interactions between the initial and final state of the perturbation using a discrete set of lambda windows. A function describes how the different lambda components (bonded and nonbonded terms) are interpolated.
Only parameters that differ between state A (``lambda=0``) and state B (``lambda=1``) are interpolated.
In the default lambda function in the :class:`.RelativeHybridTopologyProtocol`, first the electrostatic interactions of state A are turned off while simultaneously turning on the steric interactions of state B. Then, the steric interactions of state A are turned off while simultaneously turning on the electrostatic interactions of state B. Bonded interactions are interpolated linearly between ``lambda=0`` and ``lambda=1``. The ``lambda_settings`` ``lambda_functions`` and ``lambda_windows`` define the alchemical pathway.
A soft-core potential is applied to the Lennard-Jones potential to avoid instablilites in intermediate lambda windows.
Both the soft-core potential functions from Beutler et al. [2]_ and from Gapsys et al. [3]_ are available and can be specified in the ``alchemical_settings.softcore_LJ`` settings
(default: ``gapsys``).
Simulation overview
~~~~~~~~~~~~~~~~~~~
The :class:`.ProtocolDAG` of the :class:`.RelativeHybridTopologyProtocol` contains the :class:`.ProtocolUnit`\ s from one leg of the thermodynamic
cycle.
This means that each :class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle and therefore two Protocol instances need to be run to get the overall relative free energy difference, ΔΔG.
If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG` contains multiple :class:`.ProtocolUnit`\ s of both vacuum and solvent transformations.
Simulation Steps
""""""""""""""""
Each :class:`.ProtocolUnit` carries out the following steps:
1. Parameterize the system using `OpenMMForceFields `_ and `Open Force Field `_.
2. Create an alchemical system (hybrid topology).
3. Minimize the alchemical system.
4. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions if solvent is present).
.. note::
**Equilibration method:**
The current implementation uses a simple equilibration protocol **without any positional restraints** or **temperature annealing**.
The system is equilibrated directly under the target thermodynamic conditions, therefore the input structures should be stable under these conditions.
5. Analyze results for the transformation (for a single leg in the thermodynamic cycle).
Note: three different types of multistate sampling (i.e. replica swapping between lambda states) methods can be chosen; HREX, SAMS, and independent (no lambda swaps attempted). By default the HREX approach is selected, this can be altered using ``simulation_settings.sampler_method`` (default: ``repex``).
Simulation details
""""""""""""""""""
Here are some details of how the simulation is carried out which are not detailed in the :class:`.RelativeHybridTopologySettings`:
* The protocol applies a `LangevinMiddleIntegrator `_ which uses Langevin dynamics, with the LFMiddle discretization [4]_.
* A MonteCarloBarostat is used in the NPT ensemble to maintain constant pressure.
Getting the free energy estimate
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The free energy differences are obtained from simulation data using the `MBAR estimator `_ (multistate Bennett acceptance ratio estimator)
as implemented in the `PyMBAR package `_.
In addition to the MBAR estimates of the two legs of the thermodynamic cycle and the overall relative binding free energy difference,
the protocol also returns some metrics to help assess convergence of the results,
these are detailed in the :ref:`multistate analysis section `.
.. note:: The MBAR uncertainty of each individual transformation is estimated using bootstrapping for 1000 iterations,
this leads to larger errors compared to the previous error estimate method. The only exception are
the forward and reverse convergence plots which use the MBAR analytical error.
.. todo: issue 792, consolidate this page into its own analysis page and link both RBFE and AFE pages to it
.. _multistate_analysis:
Analysis
~~~~~~~~
As standard, some analysis of the each simulation repeat is performed.
This analysis is made available through either the dictionary of results in the execution output,
or through some ready-made plots for quick inspection.
This analysis can be categorised as relating
to the energetics of the different lambda states that were sampled,
or to the analysis of the change in structural conformation over time in each state.
Energetic and replica exchange analysis
"""""""""""""""""""""""""""""""""""""""
These analyses consider the swapping and energetic overlap between the
different simulated states to help assess the convergence and correctness of the estimate of free energy
difference produced.
.. list-table:: Energetic Analysis examples
:widths: 75 25
:header-rows: 1
* - Description
- Example
* - **MBAR overlap matrix.**
This plot is used to assess if the different lambda states simulated overlapped energetically.
Each matrix element represents the probability of a sample from a given row state being observable in a given column
state.
Since the accuracy of the MBAR estimator depends on sufficient overlap between lambda states, this is a very
important metric.
This plot should show that the diagonal of the matrix has some "width" so that the two end states are connected,
with elements adjacent to the diagonal being at least 0.03 [5]_.
- .. image:: img/mbar_overlap_matrix.png
* - **Replica exchange probability matrix** (for replica exchange sampler simulations only).
Similar to the MBAR overlap matrix, this shows the probability of a given lambda state being exchanged with another.
Again, the diagonal of this matrix should be at least tridiagonal wide for the two end states to be connected.
- .. image:: img/replica_exchange_matrix.png
* - **Forward and reverse convergence of free energy estimates.**
Using increasingly larger portions of the total data,
this analysis calculates the free energy difference, both in forward and backward directions.
In this analysis, forward and backward estimates that agree within error using only a fraction of the total data
suggest convergence [5]_. Note: the error bars reported in this plot are
MBAR analytical errors instead of bootstrap errors.
- .. image:: img/forward_reverse_convergence.png
* - **Timeseries of replica states.**
This plot shows the time evolution of the different system configurations as they are
exchanged between different lambda states.
This plot should show that the states are freely mixing and that there are no cliques forming.
- .. image:: img/replica_state_timeseries.png
Structural analysis
"""""""""""""""""""
If a protein was present, these analyses first center and align the system so that
the protein is considered the frame of reference.
Further analysis can be performed by inspecting the ``simulation.nc`` and ``hybrid_system.pdb`` files,
which contain a multistate trajectory and topology for the hybrid system respectively.
These files can be loaded into an MDAnalysis Universe object using the `openfe_analysis`_ package.
.. list-table:: Structural Analysis examples
:widths: 75 25
:header-rows: 1
* - Description
- Example
* - **Ligand RMSD.**
This produces a plot called ``ligand_RMSD.png`` and a results entry ``ligand_RMSD`` which gives the
RMSD of the ligand molecule over time relative to the first frame of the production phase, for each simulated state.
Large RMSD values, e.g. greater than 5 angstrom (system dependent), would indicate an unstable ligand binding mode.
- .. image:: img/ligand_RMSD.png
* - **Ligand COM drift.**
For simulations with a protein present, this metric gives the total distance of the ligand COM
from its initial starting (docked) position. If this metric increases over the course of the simulation (beyond 5
angstrom) it indicates that the ligand drifted from the binding pocket, and the simulation is unreliable.
This produces a plot called ``ligand_COM_drift.png`` and a results entry ``ligand_COM_drift``.
- .. image:: img/ligand_COM_drift.png
* - **Protein 2D RMSD.**
For simulations with a protein present, this metric gives, for each lambda state, the RMSD of the
protein structure over time, using each frame analysed as a reference frame, to produce a 2 dimensional heatmap.
This plot should show no significant spikes in RMSD (which will appear as brightly coloured areas).
- .. image:: img/protein_2D_RMSD.png
See Also
--------
**Setting up RFE calculations**
* :ref:`Setting up alchemical networks `
**Tutorials**
* :any:`Relative Free Energies with the OpenFE CLI `
* :any:`Relative Free Energies with the OpenFE Python API `
**Cookbooks**
:ref:`Cookbooks `
**API Documentation**
* :ref:`OpenMM Relative Hybrid Topology Protocol `
* :ref:`OpenMM Protocol Settings `
References
----------
* `pymbar `_
* `perses `_
* `OpenMMTools `_
* `OpenMM `_
.. [1] Dummy Atoms in Alchemical Free Energy Calculations, Markus Fleck, Marcus Wieder, and Stefan Boresch, J. Chem. Theory Comput.2021, 17, 4403−4419
.. [2] Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations, T.C. Beutler, A.E. Mark, R.C. van Schaik, P.R. Greber, and W.F. van Gunsteren, Chem. Phys. Lett., 222 529–539 (1994)
.. [3] New Soft-Core Potential Function for Molecular Dynamics Based Alchemical Free Energy Calculations, V. Gapsys, D. Seeliger, and B.L. de Groot, J. Chem. Theor. Comput., 8 2373-2382 (2012)
.. [4] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
.. [5] Guidelines for the analysis of free energy calculations, Pavel V. Klimovich, Michael R. Shirts, and David L. Mobley, J Comput Aided Mol Des. 2015 May; 29(5):397-411. doi: 10.1007/s10822-015-9840-9
.. _openfe_analysis: https://github.com/OpenFreeEnergy/openfe_analysis
================================================
FILE: docs/guide/protocols/septop.rst
================================================
.. _userguide_septop_protocol:
Separated Topologies Protocol
=============================
Overview
--------
The :class:`SepTopProtocol <.SepTopProtocol>` [1]_, [2]_ calculates the difference in binding free energy between two ligands.
This protocol essentially performs two absolute binding free energy calculations simultaneously in opposite directions,
by (alchemically) inserting one ligand into the binding site, while removing the other ligand at the same time.
In contrast to the :ref:`RelativeHybridTopologyProtocol `, the two ligand topologies are
completely separate (meaning there is no common core), making atom mapping unnecessary and allowing transformations between chemically diverse ligands.
The relative binding free energy is calculated through a thermodynamic cycle by transforming one ligand into the other ligand
both in the solvent and in the binding site.
Restraints are required to keep the weakly
coupled and fully decoupled ligand in the binding site region and thereby reduce the phase
space that needs to be sampled. In the :class:`SepTopProtocol <.SepTopProtocol>`
we apply orientational, or Boresch-style, restraints, as described below.
In this cycle, the interactions of one molecule are turned off while simultaneously turning on interactions of the other molecule both in the solvent and complex phases.
The relative binding free energy is then obtained via summation of free energy differences along the thermodynamic cycle.
.. figure:: img/septop_cycle.png
:scale: 50%
Thermodynamic cycle for the SepTop free energy protocol.
Scientific Details
------------------
Orientational restraints
~~~~~~~~~~~~~~~~~~~~~~~~
Orientational, or Boresch-style, restraints are automatically (unless manually specified) applied between three protein and three ligand atoms using one bond,
two angle, and three dihedral restraints. Reference atoms are picked based on different criteria, such as the root mean squared
fluctuation of the atoms in a short MD simulation, the secondary structure of the protein, and the distance between atoms,
based on heuristics from Baumann et al. [2]_ and Alibay et al. [3]_.
Two strategies for selecting protein atoms are available, either picking atoms that are bonded to each other or that can span multiple residues.
This can be specified using the ``restraint_settings.anchor_finding_strategy`` settings.
Partial annihilation scheme
~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the :class:`SepTopProtocol <.SepTopProtocol>` the coulombic interactions of the molecules are fully turned off (annihilated) in the respective non-interacting end states.
The Lennard-Jones interactions are instead decoupled, meaning the intermolecular interactions are turned off, keeping the intramolecular Lennard-Jones interactions.
The lambda schedule
~~~~~~~~~~~~~~~~~~~
Molecular interactions are modified along an alchemical path using a discrete set of lambda windows.
For the transformation of ligand A to ligand B in the binding site, the following steps are carried out, starting with ligand A being fully interacting in the binding site while ligand B is decoupled.
1. Restrain the non-interacting dummy ligand B in the binding site. The contribution of the restraints is calculated analytically.
2. Turn on the van der Waals (vdW) interactions of ligand B while also turning on orientational restraints on ligand A.
3. Turn on the electrostatic interactions of ligand B while at the same time turning off the electrostatics of ligand A.
4. Turn off vdW interactions of ligand A while simultaneously releasing restraints on ligand B.
5. Release the restraints of the now dummy ligand A analytically.
The lambda schedule in the solvent phase is similar to the one in the complex, except that a single harmonic distance restraint is
applied between the respective central atom in the two ligands to keep the ligands apart while doing the alchemical transformation.
A soft-core potential from Beutler et al. [4]_ is applied to the Lennard-Jones potential to avoid instablilites in intermediate lambda windows.
The lambda schedule is defined in the ``lambda_settings`` objects ``lambda_elec_A``, ``lambda_elec_B``, ``lambda_vdw_A``, ``lambda_vdw_B``,
``lambda_restraints_A``, and ``lambda_restraints_B``.
Simulation overview
~~~~~~~~~~~~~~~~~~~
The :class:`.ProtocolDAG` of the :class:`SepTopProtocol <.SepTopProtocol>` contains :class:`.ProtocolUnit`\ s from both
the complex and solvent transformations.
This means that both legs of the thermodynamic cycle are constructed and run sequentially in the same
:class:`.ProtocolDAG`. This is different from the :class:`.RelativeHybridTopologyProtocol` where the
:class:`.ProtocolDAG` only runs a single leg of a thermodynamic cycle.
If multiple ``protocol_repeats`` are run (default: ``protocol_repeats=3``), the :class:`.ProtocolDAG`
contains multiple :class:`.ProtocolUnit`\ s of both complex and solvent transformations.
In that case, every :class:`.ProtocolUnit` would be run N times, where N is the number of ``protocol_repeats``. This means that also the
selection of the atoms for restraints would be performed multiple times.
Simulation steps
""""""""""""""""
Each :class:`.ProtocolUnit` (whether complex or solvent) carries out the following steps:
1. Parameterize the system using `OpenMMForceFields `_ and `Open Force Field `_.
2. Equilibrate the fully interacting system using a short MD simulation using the same approach as the :class:`.PlainMDProtocol` (in the solvent leg this will include rounds of NVT and NPT equilibration).
3. Add restraints to the system: Orientational restraints in the complex, a single harmonic distance restraint in the solvent leg.
4. Create an alchemical system.
5. Minimize the alchemical system.
6. Equilibrate and production simulate the alchemical system using the chosen multistate sampling method (under NPT conditions).
7. Analyze results for the transformation.
.. note:: Three different types of multistate sampling (i.e. replica swapping between lambda states) methods can be chosen; HREX, SAMS, and independent (no lambda swaps attempted).
By default the HREX approach is selected, this can be altered using ``solvent_simulation_settings.sampler_method`` or ``complex_simulation_settings.sampler_method`` (default: ``repex``).
Simulation details
""""""""""""""""""
Here are some details of how the simulation is carried out which are not detailed in the :class:`SepTopProtocol <.SepTopProtocol>`:
* The protocol applies a `LangevinMiddleIntegrator `_ which uses Langevin dynamics, with the LFMiddle discretization [5]_.
* A `Monte Carlo barostat `_ is used in the NPT ensemble to maintain constant pressure.
Getting the free energy estimate
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The free energy differences are obtained from simulation data using the `MBAR estimator `_ (multistate Bennett acceptance ratio estimator) as implemented in the `PyMBAR package `_.
Both the MBAR estimates of the two legs of the thermodynamic cycle, and the overall relative binding free energy (of the entire cycle) are obtained,
which is different compared to the results in the :class:`.RelativeHybridTopologyProtocol` where results from two legs of the thermodynamic cycle are obtained separately.
In addition to the estimates of the free energy changes and their uncertainty, the protocol also returns some metrics to help assess convergence of the results, these are detailed in the :ref:`multistate analysis section `.
See Also
--------
**Tutorials**
* :any:`Separated Topologies Free Energies tutorial <../../tutorials/septop_tutorial>`
**Cookbooks**
:ref:`Cookbooks `
**API Documentation**
* :ref:`OpenMM Protocol Settings `
References
----------
* `pymbar `_
* `yank `_
* `OpenMMTools `_
* `OpenMM `_
.. [1] Separated topologies--a method for relative binding free energy calculations using orientational restraints, G. Rocklin, D. Mobley, K. Dill; Chem Phys, 2013; 138(8):085104. doi: 10.1063/1.4792251.
.. [2] Broadening the Scope of Binding Free Energy Calculations Using a Separated Topologies Approach, H. Baumann, E. Dybeck, C. McClendon, F. Pickard IV, V. Gapsys, L. Pérez-Benito, D. Hahn, G. Tresadern, A. Mathiowetz, D. Mobley, J. Chem. Theory Comput., 2023, 19, 15, 5058–5076
.. [3] Evaluating the use of absolute binding free energy in the fragment optimisation process, I. Alibay, A. Magarkar, D. Seeliger, P. Biggin, Commun Chem 5, 105 (2022)
.. [4] Avoiding singularities and numerical instabilities in free energy calculations based on molecular simulations, T.C. Beutler, A.E. Mark, R.C. van Schaik, P.R. Greber, and W.F. van Gunsteren, Chem. Phys. Lett., 222 529–539 (1994)
.. [5] Unified Efficient Thermostat Scheme for the Canonical Ensemble with Holonomic or Isokinetic Constraints via Molecular Dynamics, Zhijun Zhang, Xinzijian Liu, Kangyu Yan, Mark E. Tuckerman, and Jian Liu, J. Phys. Chem. A 2019, 123, 28, 6056-6079
================================================
FILE: docs/guide/results/index.rst
================================================
.. _userguide_results:
Results Gathering
=================
With simulations completed,
the results of individual simulations can be inspected,
and entire networks of results analysed.
.. toctree::
working_with_results
working_with_networks
================================================
FILE: docs/guide/results/working_with_networks.rst
================================================
.. _userguide_result_networks:
Working with networks of results
================================
After running a **network** of free energy calculations,
we often want to analyse the corresponding network of results.
.. _userguide_MLE:
Converting relative results to absolute estimates
-------------------------------------------------
When a network of relative free energies has been calculated,
a commonly performed task is to transform these pairwise estimations of **relative** free energy differences (:math:`\Delta \Delta G`)
into **absolute** free energy differences (:math:`\Delta G`).
This is done using a maximum likelihood estimator (MLE) [1]_,
as implemented in the `cinnabar`_ package.
This approach uses the matrix of relative pairwise measurements and their uncertainties,
to estimate the overall ranking of ligands.
To use this approach the network of pairwise measurements needs to be fully connected,
i.e. there should be a way to trace a path along pairwise measurements between any two nodes (ligands) on the network.
.. note::
The results of a MLE estimation will have a **mean** of 0.0,
meaning that there will be some estimates with positive values and some estimates with negative values.
These predictions (:math:`\Delta G_{pred}`) can be shifted to match the magnitude of the experimental data,
to satisfy the below equation where the sum is performed over N molecules that have experimental data (:math:`\Delta G_{exp}`) [2]_.
.. math::
\sum_i^N \Delta G^i_{exp} = \sum_i^N \Delta G^i_{pred}
Gathering using the command line
--------------------------------
After running calculations using the :ref:`quickrun command `,
the :ref:`openfe gather ` command offers a way to collate information across many different individual
simulations and prepare a table of results.
The tool offers a summary of the relative binding affinities (``--report ddg``),
or their :ref:`corresponding MLE values ` (``--report dg``).
Using cinnabar directly
-----------------------
The `cinnabar`_ package can be used from within Python to manipulate networks of free energy estimates.
A tutorial on using this is provided here :ref:`here `
See also
--------
For handling the results of a single calculation, please consult :ref:`userguide_individual_results`
.. [1] Optimal Measurement Network of Pairwise Differences, Huafeng Xu, J. Chem. Inf. Model. 2019, 59, 11, 4720-4728
.. [2] Accurate and Reliable Prediction of Relative Ligand Binding Potency in Prospective Drug Discovery by Way of a Modern Free-Energy Calculation Protocol and Force Field
Lingle Wang, Yujie Wu, Yuqing Deng, Byungchan Kim, Levi Pierce, Goran Krilov, Dmitry Lupyan, Shaughnessy Robinson, Markus K. Dahlgren, Jeremy Greenwood, Donna L. Romero, Craig Masse, Jennifer L. Knight, Thomas Steinbrecher, Thijs Beuming, Wolfgang Damm, Ed Harder, Woody Sherman, Mark Brewer, Ron Wester, Mark Murcko, Leah Frye, Ramy Farid, Teng Lin, David L. Mobley, William L. Jorgensen, Bruce J. Berne, Richard A. Friesner, and Robert Abel
Journal of the American Chemical Society 2015 137 (7), 2695-2703 DOI: 10.1021/ja512751q
.. _cinnabar: https://github.com/OpenFreeEnergy/cinnabar
================================================
FILE: docs/guide/results/working_with_results.rst
================================================
.. _userguide_individual_results:
Working with individual results
===============================
With :ref:`execution of your calculations ` completed,
we can now start looking at what has been produced.
The majority of Protocols will produce estimates of free energy differences between two or more ``ChemicalSystem`` \s
(the current exception being the :class:`.PlainMDProtocol` which just simulates the dynamics of a single system).
Beyond this, the exact data produced by a given Protocol can vary significantly,
for example the :class:`.RelativeHybridTopologyProtocol` protocol will produce graphs to assess the quality of the simulation, alongside trajectory data files.
By comparison, the :class:`.PlainMDProtocol` will only produce the latter.
For exact details on what is produced consult the :ref:`pages for each Protocol`.
.. todo crossref to HREX and MD Protocol docs from issue 743
How you can inspect these results depends on whether you have executed your simulations
from the command line or a Python script.
From command line execution
---------------------------
If you had executed your calculation using the :ref:`quickrun ` command,
then a ``.json`` results log file as well as a directory of files will have been produced.
This directory will have various plots and results of analysis, the exact details of which are described
in the :ref:`pages for each Protocol`.
Most importantly, the ``.json`` results file has ``estimate`` and ``uncertainty`` keys,
which serve the same purpose as the ``get_estimate()`` and ``get_uncertainty()`` methods described below.
The full ``json`` results file can be reloaded into a Python session as::
>>> import gufe
>>> import json
>>>
>>> with open('././Transformation-97d7223f918bbdb0570edc2a49bbc43e_results.json', 'r') as f:
... results = json.load(f, cls=gufe.tokenization.JSON_HANDLER.decoder)
>>> results['estimate']
-19.889719513104342
>>> results['uncertainty']
0.574685524681712
From Python execution
---------------------
Executing a :class:`.ProtocolDAG` using :func:`openfe.execute_DAG` will produce a :class:`.ProtocolDAGResult`,
representing a single iteration of estimating the free energy difference.
One or more of these can be put into the ``.gather()`` method of the ``Protocol`` to form a :class:`.ProtocolResult`,
this class takes care of the averaging and concatenation of different iterations of the estimation process.
This ``ProtocolResult`` class has ``.get_estimate()`` and ``.get_uncertainty()`` methods which return the estimates
of free energy difference along with its uncertainty.
See Also
--------
For how to deal with multiple results forming a network consult the :ref:`working with networks`
page.
================================================
FILE: docs/guide/setup/alchemical_network_model.rst
================================================
.. _alchemical_network_model:
Alchemical Networks: Planning a Simulation Campaign
===================================================
The ultimate goal of the setup stage is to create an :class:`.AlchemicalNetwork`,
which contains all the information needed for a campaign of simulations, including the
``openfe`` objects that define the chemical systems and alchemical transformations.
.. TODO provide a written or image based comparison between alchemical and thermodynamic cycles
Like any network, an :class:`.AlchemicalNetwork` can be described in terms
of nodes and edges between nodes. The nodes are :class:`.ChemicalSystem`\s,
which describe the specific molecules involved. The edges are
:class:`.Transformation` objects, which carry all the information about how
the simulation is to be performed.
.. figure:: img/AlchemicalNetwork.png
In practice, nodes must be associated with a transformation in order to be
relevant in an alchemical network; that is, there are no disconnected nodes.
This means that the alchemical network can be fully described by just the
edges (which contain information on the nodes they connect). Note that this
does not mean that the entire network must be fully connected -- just that
there are no solitary nodes.
Each :class:`.Transformation` represents everything that is needed to
calculate the free energy differences between the two
:class:`.ChemicalSystem`\ s that are the nodes for that edge. In addition to
containing the information for each :class:`.ChemicalSystem`, the
:class:`.Transformation` also contains a :class:`.Protocol` and, when
relevant, atom mapping information for alchemical transformations. The latter
is often done through a :class:`.LigandNetwork`.
.. _alchemical_network_creation:
3 Ways to Create an Alchemical Network
--------------------------------------
1. Python API
^^^^^^^^^^^^^
You can manually create a :class:`.AlchemicalNetwork` by creating a list
of :class:`.Transformation` objects. For examples using the Python API,
see :ref:`cookbook on creating alchemical networks `.
2. Python ``NetworkPlanner`` Convenience Classes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
OpenFE also provides the convenience classes :class:`.RBFEAlchemicalNetworkPlanner` and :class:`.RHFEAlchemicalNetworkPlanner`,
which use the :class:`.RelativeHybridTopologyProtocol` for creating :class:`.AlchemicalNetwork`\s.
For example usage of these convenience classes, see :ref:`Relative Alchemical Network Planners cookbook `.
.. note::
The Network Planners are provided for user convenience. While they cover
majority of use cases, they may not currently offer the complete range
of options available through the Python API.
3. Command Line ``NetworkPlanner``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The Alchemical Network Planners can also be called directly from the
:ref:`command line interface `.
For example, you can create a Relative Hydration Free Energy (RHFE) network
using:
.. code:: bash
$ openfe plan-rhfe-network -M dir_with_sdfs/
or a Relative Binding Free Energy (RBFE) network using:
.. code:: bash
$ openfe plan-rbfe-network -p protein.pdb -M dir_with_sdfs/
For more CLI details, see :ref:`RBFE CLI tutorial ` and the :ref:`userguide_cli_interface`.
See Also
--------
* :ref:`Alchemical Network API reference `
* :ref:`Chemical Systems UserGuide entry `
================================================
FILE: docs/guide/setup/chemical_systems_and_thermodynamic_cycles.rst
================================================
.. _userguide_chemicalsystems_and_components:
Components, Chemical Systems and Thermodynamic Cycles
=====================================================
This page describes the core building blocks used to define simulation states in openfe:
:class:`.Component`\s, which describe what is physically present in a system;
:class:`.ChemicalSystem`\s, which combine components into a complete end state;
and thermodynamic cycles, which connect end states via alchemical transformations.
.. _userguide_components:
Components
----------
Components are the composable building blocks that define the chemical
composition of a simulated system. Splitting a system into components serves three purposes:
1. Alchemical transformations can be easily understood by comparing the differences in components.
2. Components can be reused to compose different systems.
3. :class:`.Protocol`\s can apply component-specific behaviour, e.g. different force fields per component.
Component types — overview
~~~~~~~~~~~~~~~~~~~~~~~~~~
.. list-table::
:header-rows: 1
:widths: 25 30 45
* - Component
- Role
- Key notes
* - :class:`.ProteinComponent`
- Biological assembly
- Typically the contents of a PDB file. May include crystallographic waters and ions (defined as HETATM entries),
and disulfide bonds (defined via CONECT records).
* - :class:`.SmallMoleculeComponent`
- Ligands and cofactors
- Can optionally contain atomic partial charges. If present, those will be used in the simulation.
* - :class:`.SolventComponent`
- Abstract solvent definition
- Defines solvent conditions and ion concentration. Does **not** include coordinates or box vectors. Solvent is added by the protocol at runtime.
* - :class:`.SolvatedPDBComponent`
- Explicitly solvated system
- Includes atomic coordinates and box vectors. Solvent is already present,
the protocol does not add any further solvation.
* - :class:`.ProteinMembraneComponent`
- Protein-membrane complex
- Subclass of :class:`.SolvatedPDBComponent`. Includes protein, membrane, solvent,
and box vectors. Replaces :class:`.ProteinComponent` in membrane systems.
.. _userguide_solvation_models:
Abstract vs explicit solvation
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
These two approaches are **mutually exclusive**:
* **Abstract solvation** — use a :class:`.SolventComponent`. The protocol adds solvent
during system preparation.
* **Explicit solvation** — use a :class:`.SolvatedPDBComponent` or
:class:`.ProteinMembraneComponent`. Solvent molecule coordinates (waters and ions) are explicitly defined in the inputs.
Either define the solvent abstractly, or provide a fully solvated system — do not mix
both for the same leg of a transformation.
.. note::
Some protocols, such as :class:`.SepTopProtocol` and :class:`.AbsoluteBindingProtocol`,
use a single :class:`.ChemicalSystem` to represent both the complex and solvent legs.
In this case, a :class:`.ChemicalSystem` may contain both a :class:`.SolventComponent`
and a :class:`.ProteinMembraneComponent`. However, these apply to *different* legs: the
:class:`.SolventComponent` is used only for the solvent leg, and the
:class:`.ProteinMembraneComponent` (which is already explicitly solvated) is used only
for the complex leg. The mutual exclusivity rule still holds per leg.
Box vectors for explicitly solvated systems
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The components :class:`.SolvatedPDBComponent` and :class:`.ProteinMembraneComponent`
require periodic box vectors. These can be provided in three ways:
1. **CRYST record in the PDB file** — OpenMM reads box vectors automatically. No additional arguments are needed::
membrane_protein = openfe.ProteinMembraneComponent.from_pdb_file('./protein_membrane.pdb')
2. **Manual specification** — box vectors can be provided explicitly as numpy arrays with OpenFF units in OpenMM format via the ``box_vectors`` argument::
import numpy as np
import openff.units as offunit
box_vectors = np.array([
[6.9587, 0.0, 0.0],
[0.0, 5.9164, 0.0],
[0.0, 0.0, 9.2692]
]) * offunit.nanometer
membrane_protein = openfe.ProteinMembraneComponent.from_pdb_file(
'./protein_membrane.pdb', box_vectors=box_vectors
)
3. **Inference from atomic coordinates** — box vectors can be estimated from the atomic
positions by passing ``infer_box_vectors=True``::
membrane_protein = openfe.ProteinMembraneComponent.from_pdb_file(
'./protein_membrane.pdb', infer_box_vectors=True
)
.. warning::
Inferring box vectors from atomic positions can be inaccurate if the PDB originates
from a previous simulation where atoms may be distributed across periodic images.
.. _userguide_chemical_systems:
ChemicalSystem
--------------
A :class:`.ChemicalSystem` is composed of components that together describe a model of the system to be simulated.
simulated system. It represents the **end state** of an alchemical transformation
and is the primary input a :class:`.Protocol` consumes to define a simulation state.
**What a ChemicalSystem defines**
* Exact atomic information (including protonation state) of protein, ligands,
cofactors, and any crystallographic waters.
* Atomic positions of all explicitly defined components such as ligands or proteins.
* The abstract or explicit definition of the solvent environment (SolventComponent).
**What a ChemicalSystem does NOT define**, and are instead handled by the Protocol:
Any simulation parameters including:
* Forcefield applied to any component, including water model or virtual particles.
* Thermodynamic conditions (e.g. temperature or pressure).
* These are handled by the :class:`.Protocol`.
.. _userguide_system_composition:
System composition examples
---------------------------
The components that make up each :class:`.ChemicalSystem` depend on the protocol and
the nature of the system. The table below summarises the composition for each combination.
.. note::
Protocol-specific behaviour:
For :class:`.SepTopProtocol` and :class:`.AbsoluteBindingProtocol`, a single
:class:`.ChemicalSystem` represents both legs of the thermodynamic cycle. The protocol
determines internally what is the complex leg and what is the solvent leg.
This differs from the :class:`.RelativeHybridTopologyProtocol`, where each leg (e.g. complex and solvent) is defined by
separate :class:`.ChemicalSystem`\s. This behaviour is expected to change in future versions.
.. list-table::
:header-rows: 1
:widths: 20 40 40
* - System
- :ref:`RBFE ` (:class:`.RelativeHybridTopologyProtocol`)
- :ref:`SepTop ` / :ref:`ABFE ` (:class:`.SepTopProtocol`, :class:`.AbsoluteBindingProtocol`)
* - **Standard protein–ligand**
- | **Complex leg:**
| :class:`.ProteinComponent` + :class:`.SmallMoleculeComponent`\s + :class:`.SolventComponent`
|
| **Solvent leg:**
| :class:`.SmallMoleculeComponent`\s + :class:`.SolventComponent`
- | **Single ChemicalSystem (both legs):**
| :class:`.ProteinComponent` + :class:`.SmallMoleculeComponent`\s + :class:`.SolventComponent`
* - **Membrane system**
- | **Complex leg:**
| :class:`.ProteinMembraneComponent` + :class:`.SmallMoleculeComponent`\s
| *(no* :class:`.SolventComponent` *— already explicitly solvated)*
|
| **Solvent leg:**
| :class:`.SmallMoleculeComponent`\s + :class:`.SolventComponent`
- | **Single ChemicalSystem (both legs):**
| :class:`.ProteinMembraneComponent` + :class:`.SmallMoleculeComponent`\s + :class:`.SolventComponent`
| *(protocol applies* :class:`.SolventComponent` *only in the solvent leg)*
Thermodynamic Cycles
--------------------
A thermodynamic cycle can be described as a set of :class:`.ChemicalSystem`\s (nodes) connected by
alchemical transformations (edges). The :class:`.Protocol` defines how the
:class:`.ChemicalSystem`\s map onto the cycle and how they are used in practice.
The same :class:`.ChemicalSystem` can be reused across multiple thermodynamic states
depending on the protocol. For details of which end states to construct, consult the
:ref:`pages for each specific Protocol `.
Hybrid topology RBFE example
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As an example, the relative binding free energy cycle requires four
:class:`.ChemicalSystem`\s — one for each node in the cycle:
.. figure:: ../protocols/img/rbfe_thermocycle.png
:scale: 40%
:alt: RBFE thermodynamic cycle
Illustration of the relative binding free energy thermodynamic cycles and the chemical systems at each end state.
::
import openfe
# two small molecules defined in a molfile format
ligand_A = openfe.SmallMoleculeComponent.from_sdf_file('./ligand_A.sdf')
ligand_B = openfe.SmallMoleculeComponent.from_sdf_file('./ligand_B.sdf')
# a complete biological assembly
protein = openfe.ProteinComponent.from_pdb_file('./protein.pdb')
# defines an aqueous solvent environment, with a concentration of ions
solvent = openfe.SolventComponent(smiles='O')
# ligand_A + protein + solvent
ligand_A_complex = openfe.ChemicalSystem(components={'ligand': ligand_A, 'protein': protein, 'solvent': solvent})
# ligand_B + protein + solvent
ligand_B_complex = openfe.ChemicalSystem(components={'ligand': ligand_B, 'protein': protein, 'solvent': solvent})
# ligand_A + solvent
ligand_A_solvent = openfe.ChemicalSystem(components={'ligand': ligand_A, 'solvent': solvent})
# ligand_B + solvent
ligand_B_solvent = openfe.ChemicalSystem(components={'ligand': ligand_B, 'solvent': solvent})
Explicitly solvated variant
~~~~~~~~~~~~~~~~~~~~~~~~~~~
When using a :class:`.SolvatedPDBComponent` or :class:`.ProteinMembraneComponent`, replace :class:`.ProteinComponent`
and :class:`.SolventComponent` for the complex leg. No separate :class:`.SolventComponent`
is required:
::
# explicitly solvated protein-membrane complex (box vectors read from CRYST1 record)
protein_membrane = openfe.ProteinMembraneComponent.from_pdb_file('./protein_membrane.pdb')
# ligand_A + explicitly solvated protein-membrane — no SolventComponent needed
ligand_A_complex = openfe.ChemicalSystem(components={'ligand': ligand_A, 'protein_membrane': protein_membrane})
See Also
--------
* To see how to construct a :class:`.ChemicalSystem` from your files, see :ref:`the cookbook entry on loading molecules `
* For details of which thermodynamic cycles to construct, consult the :ref:`pages for each specific Protocol `
================================================
FILE: docs/guide/setup/creating_atom_mappings_and_scores.rst
================================================
.. _userguide_mappings:
.. _Creating Atom Mappings:
Creating Atom Mappings
======================
``Atom Mapping`` objects are used to define the relationship between
:ref:`components ` from different :class:`.ChemicalSystem`\s.
This guide will show how ``Atom Mappings`` can describe the transformation between a pair of ligands.
Generating Mappings
-------------------
The :class:`.LigandAtomMapper` takes pairs of :class:`openfe.SmallMoleculeComponent`\s and returns zero
(in the case that no mapping can be found) or more possible mappings.
Built in to the ``openfe`` package are bindings to the `Lomap `_ package,
including the :class:`.openfe.setup.LomapAtomMapper`, which uses an MCS approach based on RDKit.
.. TODO: insert example output
This is how we can create a mapping between two ligands:
.. code::
import openfe
from openfe import setup
# as previously detailed, load a pair of ligands
m1 = SmallMoleculeComponent(...)
m2 = SmallMoleculeComponent(...)
# first create an atom mapper
mapper = setup.LomapAtomMapper(threed=True)
# this returns an iterable of possible mappings
mapping_gen = mapper.suggest_mappings(m1, m2)
# extract all possible mappings into a list
mappings = list(mapping_gen)
# Lomap always produces a single Mapping, so extract it from the list
mapping = mappings[0]
The two molecules passed into the ``suggest_mappings()`` method are then referred to
as ``componentA`` and ``componentB`` (in the above example, ``m1`` is ``componentA`` and ``m2`` is ``componentB``).
The atom mapping can be accessed through the ``componentA_to_componentB`` attribute, which returns a dictionary
where keys refer to the indices of atoms in the "A" component, and values refer to indices of atoms in the "B" component.
If a given index does not appear, then it is unmapped.
.. note::
Like the Component objects, a Mapping object is immutable once created!
Visualising Mappings
--------------------
In an interactive notebook we can view a 2D representation of the mapping.
In this view,
atoms that are deleted are coloured red, while atoms that undergo an elemental transformation are coloured blue.
Similarly, bonds that are deleted are coloured red,
while bonds that change (either bond order change or element change), are coloured blue.
.. image:: img/2d_mapping.png
:width: 90%
:align: center
:alt: Sample output of 2d mapping visualisation
These 2D mappings can be saved to file using the :func:`LigandAtomMapping.draw_to_file()` method.
With the ``py3dmol`` package, we can inspect the spatial overlap of the mapping in 3D.
In a notebook, this produces an interactive rotatable view of the mapping.
The left and rightmost views show the "A" and "B" molecules
with coloured spheres on each showing the correspondence between atoms.
The centre view shows both molecules overlaid, allowing the spatial correspondence to be directly viewed.
.. code::
from openfe.utils import visualization_3D
view = mapping.view_3d()
.. image:: img/3d_mapping.png
:width: 90%
:align: center
:alt: Sample output of view_3d() function
The cartesian distance between pairs of atom mapping is also available via the :meth:`.get_distances()` method.
This returns a numpy array.
.. code::
mapping.get_distances()
.. _Scoring Atom Mappings:
Scoring Mappings
----------------
Mapping **scorers**, or "scoring functions", evaluate the quality of an atom mapping and
can be used as objective functions for optimizing ligand networks.
**Scorers** take a :class:`.LigandAtomMapping` object and return a value from 0.0 (indicating a terrible mapping)
to 1.0 (indicating a great mapping).
Because **scorers** are normalized, it is possible to use multiple **scorers** together.
For example, the built-in Lomap scorer :func:`default_lomap_score` combines several criteria
(such as the number of heavy atoms, if certain chemical changes are present,
and if ring sizes are being mutated), into a single value.
It is possible to combine scoring functions in this way because each scoring function returns a normalized value.
.. code::
from openfe.setup.lomap_scorers
mapping = next(mapper.suggest_mappings(m1, m2))
score = lomap_scorers.default_lomap_scorer(mapping)
================================================
FILE: docs/guide/setup/creating_ligand_networks.rst
================================================
.. _userguide_ligand_network:
Defining the Ligand Network
===========================
A :class:`.LigandNetwork` is a network where nodes are :class:`.SmallMoleculeComponent`\ s and edges are :class:`.LigandAtomMapping`\ s.
For example, a :class:`.LigandNetwork` with drug candidates as nodes can be used to conduct a free energy campaign and compute ligand rankings.
**openfe** includes an interface to common :any:`Ligand Network Planners`, which are implemented in OpenFE's `konnektor `_ package.
(See `konnektor's documentation `_ for more information on network generators.)
A :class:`.LigandNetwork` is constructed from :class:`.SmallMoleculeComponent`, which represent the nodes and optionally :class:`.LigandAtomMapping`, which represent the edges of the network.
A :class:`.LigandAtomMapping` can have a :ref:`score associated with the mapping ` which can be used by some network generators to construct more efficient network topologies.
Below is an example of a ``LigandNetwork`` with scores assigned to each atom mapping:
.. image:: img/ligand_network.png
:width: 80%
:align: center
:alt: Concept of a simple MST ligand network
Generating Ligand Networks
--------------------------
:class:`.LigandNetwork` generation can typically described by three steps:
1. Generate the :ref:`Atom Mappings` of all pairwise combinations of :class:`.SmallMoleculeComponent`\ s
2. :ref:`Calculate scores` for each :class:`.LigandAtomMapping`
3. Build a :class:`.LigandNetwork` with all possible mappings directed by their scores.
.. code:: python
import openfe
from openfe import setup
# load a set of ligands
mols = [SmallMoleculeComponent.from_rdkit(x) for x in rdmols]
# generate the required mapper, scorer, and planner objects
mapper = setup.KartografAtomMapper()
scorer = setup.lomap_scorers.default_lomap_score
network_planner = setup.ligand_network_planning.generate_minimal_spanning_network
# plan the ligand network
ligand_network = network_planner(ligands=mols, mappers=[mapper], scorer=scorer)
Practical information on generating ligand networks can be found in our :ref:`cookbook for ligand network generation `.
.. note::
Like the Component objects, a ``LigandNetwork`` object is immutable once created!
================================================
FILE: docs/guide/setup/defining_protocols.rst
================================================
.. _defining-protocols:
Protocols in OpenFE
============================
A :class:`.Protocol` is a computational method for estimating the free energy difference between two chemical systems.
Just as there are multiple possible methods for estimating free energy differences,
there are multiple available ``Protocol``\s to choose from.
For example, included in the ``openfe`` package are the following:
* :class:`.RelativeHybridTopologyProtocol`
* :class:`.AbsoluteBindingProtocol`
* :class:`.SepTopProtocol`
* :class:`.AbsoluteSolvationProtocol`
* :class:`.PlainMDProtocol`
More protocols are in development, and a full list of available protocols
can be found at :ref:`userguide_protocols`.
Because :class:`.Protocol`\s share a common interface for how they are created and executed,
it is relatively straightforward to try out a new method,
or benchmark several to choose the best for a particular project.
Defining Settings and Creating Protocols
----------------------------------------
A ``Settings`` object contains all the parameters needed by a ``Protocol``.
Each ``Protocol`` has a ``.default_settings()`` method, which will provide a sensible default
starting point and relevant documentation.
.. TODO: print what a settings object looks like, or how you might define custom settings
For example, to create an instance of the OpenMM RFE Protocol with default settings::
from openfe.protocols import openmm_rfe
settings = openmm_rfe.RelativeHybridTopologyProtocol.default_settings()
protocol = openmm_rfe.RelativeHybridTopologyProtocol(settings)
``Protocol`` objects **cannot be modified once created**. This is crucial for data provenance.
Therefore, the ``Settings`` objects must be customised *before* the ``Protocol`` object is created.
For example, to customise the production run length of the RFE Protocol::
from openfe.protocols import openmm_rfe
settings = openmm_rfe.RelativeHybridTopologyProtocol.default_settings()
settings.simulation_settings.production_length = '10 ns'
protocol = openmm_rfe.RelativeHybridTopologyProtocol(settings)
Adaptive Settings
~~~~~~~~~~~~~~~~~
.. warning::
The ``_adaptive_settings()`` method is experimental and subject to change.
In addition to the ``.default_settings()`` method, some protocols
provide an ``_adaptive_settings`` method. This method generates recommended settings
based on properties of the input :class:`.ChemicalSystem`\s and, where required, the :class:`.AtomMapping`.
For example::
from openfe.protocols import openmm_rfe
settings = openmm_rfe.RelativeHybridTopologyProtocol._adaptive_settings(
stateA=stateA,
stateB=stateB,
mapping=mapping,
)
protocol = openmm_rfe.RelativeHybridTopologyProtocol(settings)
The adaptive settings may modify parameters based on properties of the input systems.
For example (:class:`.RelativeHybridTopologyProtocol`):
* Transformations involving a change in net charge use a larger number of lambda windows and longer production simulations.
* If both states contain a :class:`.ProteinComponent`, the solvation padding is set to 1 nm.
Optionally, you can pass a preexisting settings object to the ``_adaptive_settings`` method via the ``initial_settings`` argument. If provided, an adapted copy of these settings will be returned instead
of using the default settings.
In systems containing membrane-protein complexes (i.e. using a
:class:`.ProteinMembraneComponent`), adaptive settings select a membrane-appropriate barostat, the ``MonteCarloMembraneBarostat``.
Creating Transformations from Protocols
-----------------------------------------
With only ``settings`` defined, a ``Protocol`` contains no chemistry-specific information.
This means that a single ``Protocol`` object can be applied to multiple pairs of ``ChemicalSystem`` objects
to measure each free energy difference.
The :class:`.Transformation` class connects two ``ChemicalSystem`` objects with a ``Protocol``, and
often a :ref:`AtomMapping ` (depending on the system).
A ``Transformation`` object is then capable of creating computational work via the :func:`.Transformation.create()` method.
For further details on this, refer to the :ref:`userguide_execution` section.
Finally, a ``Protocol`` is responsible for using the data generated in this process to perform further analysis,
such as generating an estimate of the free energy difference.
For further details on this refer to the :ref:`userguide_results` section,
or the details of each method in :ref:`userguide_protocols`.
================================================
FILE: docs/guide/setup/index.rst
================================================
.. _userguide_setup:
Simulation Setup
================
This section provides details on how to set up a free energy calculation or MD simulations.
All protocols in OpenFE follow the same general structure:
* Reading in input structures and creating :class:`.ChemicalSystem` \s
* Defining the :class:`.Protocol` with specific `ProtocolSettings`.
* Creating :class:`.LigandAtomMapping` \s for relative free energy calculation `Protocols`.
.. image:: img/setup_1x.png
:width: 70%
:align: center
:alt: Concept of a ChemicalSystems and Transformations
The image below demonstrates how, for relative free energy calculations, you plan a
network of ligand transformations starting from input SDF / MOL2 / PDB files:
.. image:: img/setup_2x.png
:width: 70%
:align: center
:alt: Concept of a LigandNetwork and AlchemicalNetwork
The procedure for setting up a simulation depends on the
type of free energy calculation you are running. More detailed
instructions can be found in the following sections:
.. toctree::
:maxdepth: 1
chemical_systems_and_thermodynamic_cycles
creating_atom_mappings_and_scores
defining_protocols
creating_ligand_networks
alchemical_network_model
To set up your alchemical network using the Python interface, but run it using the CLI,
you will need to export the network in the same format used by the CLI.
See :ref:`dumping transformations ` for more details.
================================================
FILE: docs/guide/troubleshooting.rst
================================================
Troubleshooting Simulations
===========================
This guide covers tips and strategies for troubleshooting simulation failures.
Log Debug information
---------------------
.. note::
When using a scheduler (e.g. SLURM), be sure to specify output files for standard out and standard error.
For example, when using SLURM both ``--output=`` and ``--error=`` must be set to view errors.
One of the first troubleshooting steps is to increase the verbosity of the logging.
``openfe`` uses Python's native logging library which can be `configured `_ either using a Python API or a configuration file.
.. warning::
**We do not recommend setting the log level to debug for production runs,** as the logging may slow down the simulation and add a lot of noise to the output.
When using ``openfe quickrun``, the configuration file is more convenient.
Below is an example logging configuration file that can be used to set the log level to ``DEBUG``:
.. code-block:: ini
[loggers]
keys=root
[handlers]
keys=stdout
[formatters]
keys=standardFormatter,msgOnly
[handler_stdout]
class=StreamHandler
level=DEBUG
formatter=standardFormatter
args=(sys.stdout,)
[logger_root]
level=DEBUG
handlers=stdout
[formatter_standardFormatter]
format=%(asctime)s %(levelname)s %(name)s: %(message)s
[formatter_msgOnly]
format=%(message)s
Save this configuration file as ``debug_logging.conf`` and then run ``openfe quickrun`` with the ``--log`` flag, for example:
.. code-block:: bash
$ openfe --log debug_logging.conf quickrun -d results/ -o results/result_lig_ejm_31_solvent_lig_ejm_42_solvent.json transformations/rbfe_lig_ejm_31_solvent_lig_ejm_42_solvent.json
Note that the ``--log debug_logging.conf`` argument goes between ``openfe`` and ``quickrun`` on the command line.
This will cause every package to log at the debug level, which may be quite verbose and noisy but should aid in identify what is going on right before the exception is thrown.
JAX warnings
------------
We use ``pymbar`` to analyze the free energy of the system.
``pymbar`` uses JAX to accelerate computation.
The JAX library can utilize a GPU to further accelerate computation.
If the necessary libraries for GPU acceleration are not installed and JAX detects a GPU, JAX will print a warning like this:
.. code-block:: bash
WARNING:2025-06-10 09:01:40,857:jax._src.xla_bridge:966: An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.
This warning does not mean that the *molecular dynamics* simulation will fall back to using the CPU.
The simulation will still use the computing platform specified in the settings.
PYMBAR_DISABLE_JAX
------------------
Due to a suspected memory leak in the JAX acceleration code in ``pymbar`` we disable JAX acceleration by default.
This memory leak may result in the simulation crashing, wasting compute time.
The error message may look like this:
.. code-block:: bash
LLVM compilation error: Cannot allocate memory
LLVM ERROR: Unable to allocate section memory!
We have decided to disable JAX acceleration by default to prevent wasted compute.
However, if you wish to use the JAX acceleration, you may set ``PYMBAR_DISABLE_JAX`` to ``TRUE`` (e.g. put ``export PYMBAR_DISABLE_JAX=FALSE`` in your submission script before running ``openfe quickrun``).
For more information, see these issues on github:
- https://github.com/choderalab/pymbar/issues/564
- https://github.com/OpenFreeEnergy/openfe/issues/1534
- https://github.com/OpenFreeEnergy/openfe/issues/1654
================================================
FILE: docs/guide/under_the_hood.rst
================================================
.. _under-the-hood:
Under the Hood
==============
.. module:: openfe
:noindex:
If you want to implement your own atom mapper or free energy procedure, or you want to do something a bit more bespoke, it's helpful to understand how OpenFE thinks about individual alchemic mutation specifications. A :class:`Transformation` stores all the information needed to run an alchemic mutation from one chemical system to another and is the basic unit of an OpenFE simulation campaign. Indeed, :class:`Transformation` objects describe the edges of the graph in the :class:`AlchemicalNetwork` class.
.. container:: deflist-flowchart
* Setup
- .. container:: flowchart-sidebyside
- -
.. rst-class:: flowchart-spacer
-
- :class:`Protocol`
Simulation procedure for an alchemic mutation.
.. rst-class:: arrow-down arrow-tail arrow-combine-right
-
- - Chemical component definition
SDF, PDB, RDKit, OpenFF Molecule, solvent spec, etc.
.. rst-class:: arrow-down arrow-tail
- :any:`Loading Molecules`
- .. container:: flowchart-sidebyside
- -
.. rst-class:: arrow-down arrow-head arrow-combine-right
-
- :class:`SmallMoleculeComponent`
The ligands that will be mutated.
.. rst-class:: arrow-down
- :any:`Creating Atom Mappings`
- :class:`LigandAtomMapping`
Corresponds atoms in one small molecule to those in another.
.. rst-class:: arrow-down arrow-tail arrow-combine
-
- -
.. rst-class:: arrow-down arrow-head arrow-combine-left
-
- :class:`SmallMoleculeComponent`, :class:`SolventComponent` and :class:`ProteinComponent`
The components that make up the chemical system.
.. rst-class:: arrow-down arrow-multiple-combine
- :any:`Assembling into ChemicalSystems`
- :class:`ChemicalSystem`
Each of the chemical systems, composed of components, that the :class:`Transformation` mutates between.
.. rst-class:: arrow-down arrow-tail arrow-combine-left arrow-multiple
-
.. rst-class:: arrow-down arrow-head
-
- :class:`Transformation`
A single alchemic mutation from one chemical system to another.
.. rst-class:: arrow-down
*
* Run
- :class:`Transformation`
A single alchemic mutation from one chemical system to another.
.. rst-class:: arrow-down
-
- :class:`ProtocolDAG`
A directed acyclic graph describing how to compute a :class:`Transformation`.
- .. container:: flowchart-sidebyside
-
-
.. rst-class:: arrow-down arrow-multiple
-
- :class:`ProtocolUnit`
A single unit of computation within a :class:`ProtocolDAG`
.. rst-class:: arrow-down
-
- :class:`ProtocolUnitResult`
The result of a completed :class:`ProtocolUnit`
.. rst-class:: arrow-down arrow-multiple-combine
-
-
-
.. rst-class:: arrow-down
- :any:`executors`
- :class:`ProtocolDAGResult`
A completed transformation.
.. rst-class:: arrow-down
*
* Gather
- .. container:: flowchart-sidebyside
-
- :class:`Transformation`
The specification for the alchemic mutation.
.. rst-class:: arrow-down
-
- :class:`Protocol`
A completed single run of a transformation.
.. rst-class:: arrow-down arrow-combine-right arrow-tail
-
-
- :class:`ProtocolResult`
A completed single run of a transformation.
.. rst-class:: arrow-down arrow-combine-left arrow-multiple arrow-tail
-
.. rst-class:: arrow-down arrow-head
-
- :class:`ProtocolDAGResult`
A completed transformation with multiple user-defined replicas.
================================================
FILE: docs/index.rst
================================================
.. template taken from SciPy who took it from Pandas (keep the chain going)
.. module:: openfe
=====================================
Welcome to OpenFE's documentation!
=====================================
The **OpenFE** toolkit provides a free and open-source framework for alchemical free energy calculations.
Using this toolkit you can plan, execute, and analyze free energy calculations using a variety of methods.
**Useful Links**:
`OpenFE Website `__ |
`Example Tutorial notebooks `__ |
`Source Repository `__ |
`Issues & Ideas `__
.. grid:: 1 2 2 4
:gutter: 3
.. grid-item-card:: :fas:`download` Install openfe
:text-align: center
:link: installation
:link-type: doc
Follow our installation guide to get **openfe** running on your machine!
.. grid-item-card:: :fas:`laptop-code` CLI Quickstart
:text-align: center
:link: tutorials/rbfe_cli_tutorial
:link-type: doc
Get started with **openfe**\'s command line interface.
.. grid-item-card:: :fas:`person-chalkboard` Tutorials
:text-align: center
:link: tutorials/index
:link-type: doc
Step-by-step examples showing how to use the OpenFE toolkit.
.. grid-item-card:: :fas:`book-open-reader` User Guide
:text-align: center
:link: guide/index
:link-type: doc
Explanations of key concept underlying the OpenFE toolkit.
.. grid-item-card:: :fas:`table-list` Cookbooks
:text-align: center
:link: cookbook/index
:link-type: doc
How-to guides for common tasks.
.. grid-item-card:: :fas:`code` API Reference
:text-align: center
:link: reference/index
:link-type: doc
Comprehensive details of the **openfe** Python and CLI APIs.
.. grid-item-card:: :fas:`gears` Protocols
:text-align: center
:link: guide/protocols/index
:link-type: doc
Details of the specific Free Energy Protocols included in **openfe**.
.. grid-item-card:: :fas:`clock-rotate-left` Changelog
:text-align: center
:link: CHANGELOG
:link-type: doc
A history of **openfe** releases.
.. toctree::
:maxdepth: 2
:hidden:
installation
tutorials/index
guide/index
cookbook/index
reference/index
CHANGELOG
Other OpenFE Ecosystem Projects:
--------------------------------
**openfe** is Open Free Energy's user-facing software for performing alchemical free energy calculations.
Below are other software projects the Open Free Energy team maintains, many of which are used by **openfe** itself.
* `konnektor `_: free energy network planning, modification, and analysis
* `kartograf `_: atom mappings focusing on 3D geometries
* `Lomap `_: planning perturbation networks for free energy calculations
* `cinnabar `_ (formerly arsenic): plotting free energy calculation results
* `gufe `_ : data structures and models underlying the OpenFE ecosystem
Community-Developed Projects:
-----------------------------
* `alchemiscale `_: high-throughput alchemical free energy execution, developed by `Datryllic `_.
================================================
FILE: docs/installation.rst
================================================
Installation
============
**openfe** is currently only compatible with POSIX systems (macOS and UNIX/Linux).
See `Supported Hardware`_ for more details.
We try to follow `SPEC0 `_ as far as minimum supported dependencies, with the following caveats:
- OpenMM 8.0, 8.1.2, 8.2, and 8.4 - **OpenMM v8.3.0 is not supported**
When you install **openfe** through any of the methods described below, you will install both the core library and the command line interface (CLI).
Installation with ``micromamba`` (recommended)
----------------------------------------------
OpenFE recommends ``micromamba`` as a package manager for most users, as it is a lightweight version of ``mamba``, which is a must faster drop-in replacement for ``conda`` .
If you prefer to use ``mamba`` or ``conda`` instead of ``micromamba`` because of its additional functionality, we suggest following our `Miniforge Installation Guide`_.
In the instructions below, we will use the ``micromamba`` command, but you can use ``conda`` or ``mamba`` in the same way.
Once you have one of `micromamba `_, `mamba `_, or `conda `_ installed, you can continue to the **openfe** installation instructions below.
.. note::
After installing, you must run ``micromamba activate openfe`` in each shell session where you want to use **openfe**!
Reproducible builds with a ``conda-lock`` file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _conda-lock: https://github.com/conda/conda-lock?tab=readme-ov-file#conda-lock
We recommend building from **openfe**'s ``conda-lock`` file in most cases, since it allows for building packages in a reproducible way on multiple platforms.
Unlike the single file installer, an internet connection is required to install from a ``conda-lock`` file.
The ``conda-lock`` files for the latest version of **openfe** can be downloaded with ::
$ curl -LOJ https://github.com/OpenFreeEnergy/openfe/releases/latest/download/openfe-conda-lock.yml
If a particular version is required, the URL will look like this (using the ``openfe 1.6.1`` release as an example) ::
$ curl -LOJ https://github.com/OpenFreeEnergy/openfe/releases/download/v1.6.1/openfe-1.6.1-conda-lock.yml
``micromamba`` supports ``conda-lock`` files and can be used directly to create a virtual environment ::
$ micromamba create -n openfe --file openfe-conda-lock.yml
$ micromamba activate openfe
.. note::
If you are having trouble building from the conda-lock file, you may need to build directly with ``conda-lock``.
We recommend installing ``conda-lock`` in a new virtual environment.
This will reduce the chance of dependency conflicts ::
$ # Install conda lock into a virtual environment
$ micromamba create -n conda-lock conda-lock
$ # Activate the environment to use the conda-lock command
$ micromamba activate conda-lock
$ conda-lock install -n openfe openfe-conda-lock.yml
$ micromamba activate openfe
To make sure everything is working, :ref:`run the tests `.
With that, you should be ready to use **openfe**!
Standard Installation with ``micromamba``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
There may be some instances where you don't want to use a lock-file, e.g. you may want to specify a dependency that differs from the lock file.
In these cases, you can simply install **openfe** from conda-forge:
.. parsed-literal::
micromamba create -c conda-forge -n openfe openfe=\ |version|
micromamba activate openfe
Single file installer
---------------------
.. warning::
The single file installer may modify your ``.bashrc`` in a way that requires manual intervention to access your previous ``conda`` installation
.. _releases on GitHub: https://github.com/OpenFreeEnergy/openfe/releases
Single file installers are available for x86_64 Linux and MacOS.
They are attached to our `releases on GitHub`_ and can be downloaded with a browser or ``curl`` (or similar tool).
For example, the Linux installer can be downloaded with ::
$ curl -LOJ https://github.com/OpenFreeEnergy/openfe/releases/latest/download/OpenFEforge-Linux-x86_64.sh
And the MacOS (arm64) installer ::
$ curl -LOJ https://github.com/OpenFreeEnergy/openfe/releases/latest/download/OpenFEforge-MacOSX-arm64.sh
MacOS x86_64 is no longer supported.
The single file installer contains all of the dependencies required for **openfe** and does not require internet access to use.
Both ``conda`` and ``mamba`` are also available in the environment created by the single file installer and can be used to install additional packages.
The installer can be installed in batch mode or interactively ::
$ chmod +x ./OpenFEforge-Linux-x86_64.sh # Make installer executable
$ ./OpenFEforge-Linux-x86_64.sh # Run the installer
Example installer output is shown below (click to expand "Installer Output")
.. collapse:: Installer Output
.. code-block::
Welcome to OpenFEforge 0.7.4
In order to continue the installation process, please review the license
agreement.
Please, press ENTER to continue
>>>
MIT License
Copyright (c) 2022 OpenFreeEnergy
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Do you accept the license terms? [yes|no]
[no] >>> yes
.. note::
The install location will be different when you run the installer.
.. code-block::
OpenFEforge will now be installed into this location:
/home/mmh/openfeforge
- Press ENTER to confirm the location
- Press CTRL-C to abort the installation
- Or specify a different location below
[/home/mmh/openfeforge] >>>
PREFIX=/home/mmh/openfeforge
Unpacking payload ...
Installing base environment...
Downloading and Extracting Packages
Downloading and Extracting Packages
Preparing transaction: done
Executing transaction: \ By downloading and using the CUDA Toolkit conda packages, you accept the terms and conditions of the CUDA End User License Agreement (EULA): https://docs.nvidia.com/cuda/eula/index.html
| Enabling notebook extension jupyter-js-widgets/extension...
- Validating: OK
done
installation finished.
Do you wish the installer to initialize OpenFEforge
by running conda init? [yes|no]
[no] >>> yes
no change /home/mmh/openfeforge/condabin/conda
no change /home/mmh/openfeforge/bin/conda
no change /home/mmh/openfeforge/bin/conda-env
no change /home/mmh/openfeforge/bin/activate
no change /home/mmh/openfeforge/bin/deactivate
no change /home/mmh/openfeforge/etc/profile.d/conda.sh
no change /home/mmh/openfeforge/etc/fish/conf.d/conda.fish
no change /home/mmh/openfeforge/shell/condabin/Conda.psm1
no change /home/mmh/openfeforge/shell/condabin/conda-hook.ps1
no change /home/mmh/openfeforge/lib/python3.9/site-packages/xontrib/conda.xsh
no change /home/mmh/openfeforge/etc/profile.d/conda.csh
modified /home/mmh/.bashrc
==> For changes to take effect, close and re-open your current shell. <==
__ __ __ __
/ \ / \ / \ / \
/ \/ \/ \/ \
███████████████/ /██/ /██/ /██/ /████████████████████████
/ / \ / \ / \ / \ \____
/ / \_/ \_/ \_/ \ o \__,
/ _/ \_____/ `
|/
███╗ ███╗ █████╗ ███╗ ███╗██████╗ █████╗
████╗ ████║██╔══██╗████╗ ████║██╔══██╗██╔══██╗
██╔████╔██║███████║██╔████╔██║██████╔╝███████║
██║╚██╔╝██║██╔══██║██║╚██╔╝██║██╔══██╗██╔══██║
██║ ╚═╝ ██║██║ ██║██║ ╚═╝ ██║██████╔╝██║ ██║
╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═════╝ ╚═╝ ╚═╝
mamba (1.4.2) supported by @QuantStack
GitHub: https://github.com/mamba-org/mamba
Twitter: https://twitter.com/QuantStack
█████████████████████████████████████████████████████████████
no change /home/mmh/openfeforge/condabin/conda
no change /home/mmh/openfeforge/bin/conda
no change /home/mmh/openfeforge/bin/conda-env
no change /home/mmh/openfeforge/bin/activate
no change /home/mmh/openfeforge/bin/deactivate
no change /home/mmh/openfeforge/etc/profile.d/conda.sh
no change /home/mmh/openfeforge/etc/fish/conf.d/conda.fish
no change /home/mmh/openfeforge/shell/condabin/Conda.psm1
no change /home/mmh/openfeforge/shell/condabin/conda-hook.ps1
no change /home/mmh/openfeforge/lib/python3.9/site-packages/xontrib/conda.xsh
no change /home/mmh/openfeforge/etc/profile.d/conda.csh
no change /home/mmh/.bashrc
No action taken.
Added mamba to /home/mmh/.bashrc
==> For changes to take effect, close and re-open your current shell. <==
If you'd prefer that conda's base environment not be activated on startup,
set the auto_activate_base parameter to false:
conda config --set auto_activate_base false
Thank you for installing OpenFEforge!
After the installer completes, close and reopen your shell.
To check if your path is setup correctly, run ``which python`` your output should look something like this ::
(base) $ which python
/home/mmh/openfeforge/bin/python
.. note::
Your path will be different, but the important part is ``openfeforge/bin/python``
Now the CLI tool should work as well ::
(base) $ openfe --help
Usage: openfe [OPTIONS] COMMAND [ARGS]...
This is the command line tool to provide easy access to functionality from
the OpenFE Python library.
Options:
--version Show the version and exit.
--log PATH logging configuration file
-h, --help Show this message and exit.
Network Planning Commands:
plan-rhfe-network Plan a relative hydration free energy network, saved as
JSON files for the quickrun command.
plan-rbfe-network Plan a relative binding free energy network, saved as
JSON files for the quickrun command.
view-ligand-network Visualize a ligand network
Quickrun Executor Commands:
gather Gather result jsons for network of RFE results into a TSV file
quickrun Run a given transformation, saved as a JSON file
Miscellaneous Commands:
fetch Fetch tutorial or other resource.
charge-molecules Generate partial charges for a set of molecules.
test Run the OpenFE test suite
To make sure everything is working, :ref:`run the tests `.
With that, you should be ready to use **openfe**!
.. _installation:containers:
Containerized Distributions
----------------------------
We provide an official docker and Apptainer (formerly Singularity) image.
The docker image is tagged with the version of **openfe** on the image and can be pulled with ::
$ docker pull ghcr.io/openfreeenergy/openfe:latest
The Apptainer image is pre-built and can be pulled with ::
$ singularity pull oras://ghcr.io/openfreeenergy/openfe:latest-apptainer
.. warning::
For production use, we recommend using version tags to prevent disruptions in workflows e.g.
.. parsed-literal::
$ docker pull ghcr.io/openfreeenergy/openfe:\ |version|
$ singularity pull oras://ghcr.io/openfreeenergy/openfe:\ |version|-apptainer
We recommend testing the container to ensure that it can access a GPU (if desired).
This can be done with the following command ::
$ singularity run --nv openfe_latest-apptainer.sif python -m openmm.testInstallation
OpenMM Version: 8.0
Git Revision: a7800059645f4471f4b91c21e742fe5aa4513cda
There are 3 Platforms available:
1 Reference - Successfully computed forces
2 CPU - Successfully computed forces
3 CUDA - Successfully computed forces
Median difference in forces between platforms:
Reference vs. CPU: 6.29328e-06
Reference vs. CUDA: 6.7337e-06
CPU vs. CUDA: 7.44698e-07
All differences are within tolerance.
The ``--nv`` flag is required for the Apptainer image to access the GPU on the host.
Your output may produce different values for the forces, but should list the CUDA platform if everything is working properly.
You can access the **openfe** CLI from the Singularity image with ::
$ singularity run --nv openfe_latest-apptainer.sif openfe --help
To make sure everything is working, run the tests ::
$ singularity run --nv openfe_latest-apptainer.sif openfe test
You can also run the long tests with ``openfe test --long``, as explained in `Testing Your Installation`_.
With that, you should be ready to use **openfe**!
.. note::
If building a custom docker image, you may need to need to add ``--ulimit nofile=262144:262144`` to the ``docker build`` command.
See this `issue `_ for details.
HPC Environments
----------------
When using High Performance Computing resources, jobs are typically submitted to a queue from a "login node" and then run at a later time, often on different hardware and in a different software environment.
This can complicate installation as getting something working on the login node does not guarantee it will work in the job.
We recommend using `Apptainer (formerly Singularity) `_ when running **openfe** workflows in HPC environments.
This images provide a software environment that is isolated from the host which can make workflow execution easier to setup and more reproducible.
See our guide on :ref:`containers ` for how to get started using Apptainer/Singularity.
.. _installation:mamba_hpc:
``micromamba`` in HPC Environments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _virtual packages: https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-virtual.html#managing-virtual-packages
We recommend using a :ref:`container ` to install **openfe** in HPC environments.
Nonetheless, **openfe** can be installed via Conda Forge on these environments also.
Conda Forge distributes its own CUDA binaries for interfacing with the GPU, rather than use the host drivers.
``conda``, ``mamba`` and ``micromamba`` all use `virtual packages`_ to detect and specify which version of CUDA should be installed.
This is a common point of difference in hardware between the login and job nodes in an HPC environment.
In order to determine the correct ``cuda-version`` version, we recommend connecting to the node where the simulation will be executed and run ``nvidia-smi``.
For example ::
$ nvidia-smi
Tue Mar 31 19:46:32 2026
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 590.48.01 Driver Version: 590.48.01 CUDA Version: 13.1 |
+-----------------------------------------+------------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+========================+======================|
| 0 NVIDIA A100 80GB PCIe On | 00000000:65:00.0 Off | 0 |
| N/A 32C P0 44W / 300W | 0MiB / 81920MiB | 0% Default |
| | | Disabled |
+-----------------------------------------+------------------------+----------------------+
+-----------------------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=========================================================================================|
| No running processes found |
+-----------------------------------------------------------------------------------------+
in this output of ``nvidia-smi`` we can see in the upper right of the output ``CUDA Version: 13.1`` which means the installed driver will support a CUDA version up to ``13.1``.
To install a version of **openfe** which is compatible with CUDA ``13.1``, run:
.. parsed-literal::
$ micromamba create -n openfe cuda-version=13.1 openfe=\ |version|
Developer install
-----------------
If you're going to be developing for **openfe**, you will want an
installation where your changes to the code are immediately reflected in the
functionality. This is called a "developer" or "editable" installation.
Getting a developer installation for **openfe** first installing the
requirements, and then creating the editable installation. We recommend
doing that with ``micromamba`` using the following procedure:
First, clone the **openfe** repository, and switch into its root directory::
$ git clone https://github.com/OpenFreeEnergy/openfe.git
$ cd openfe
Next create a ``conda`` environment containing the requirements from the
specification in that directory::
$ micromamba create -f environment.yml
Then activate the openfe environment with::
$ micromamba activate openfe_env
Finally, create the editable installation::
$ python -m pip install --no-deps -e .
Note the ``.`` at the end of that command, which indicates the current
directory.
.. _testing:
Testing Your Installation
-------------------------
After installing **openfe**, make sure everything is working as expected by running the test suite with ::
$ openfe test
The test suite contains several hundred individual tests.
This will take a few minutes, and all tests should complete with status either passed, skipped, or xfailed (expected fail).
The very first time you run this, the initial check that you can import ``openfe`` will take a while, because some code is compiled the first time it is encountered.
That compilation only happens once per installation, and so subsequent calls to ``openfe`` will be faster.
A more expansive test suite can be run using ::
$ openfe test --long
This test suite contains several hundred individual tests.
This may take up to an hour, and all tests should complete with status either passed, skipped, or xfailed (expected fail).
This "long" test suite should be run as a job on the compute hardware intended to run openfe jobs, as it will test GPU specific features.
Troubleshooting Your Installation
---------------------------------
We have created a script that can be run locally to assist in troubleshooting errors.
The script does not upload any information and the output may be inspected before the output is sent to us.
We recommend running the script in the same environment where the error was observed.
For example, if you had an error when creating a system on your local workstation, run the script locally with the same conda environment active as when the error occurred.
If the error occurred when running the job on an HPC resource, then run the script (ideally) on the same node where the problem occurred.
This helps to debug issues such as a CUDA and NVIDIA driver mismatch (which would be impossible to diagnose if the script was ran on a login node without a GPU).
The script is available here: https://github.com/OpenFreeEnergy/openfe/blob/main/devtools/debug_openmm.sh
For your convenience, this command will download the script and save the output as ``debug.log``
.. parsed-literal::
$ bash -c "$(curl -Ls https://raw.githubusercontent.com/OpenFreeEnergy/openfe/main/devtools/debug_openmm.sh)" | tee -a debug.log
The output of the script will also be printed to standard out as it is executed.
While no sensitive information is extracted, it is good practice to review the output before sending it or posting it to ensure that nothing needs to be redacted.
For example, if your python path was ``/data/SECRET_COMPOUND_NAME/python`` then that would show up in ``debug.log``.
Common Errors
-------------
.. parsed-literal::
openmm.OpenMMException: Error loading CUDA module: CUDA_ERROR_UNSUPPORTED_PTX_VERSION (222)
This error likely means that the CUDA version that ``openmm`` was built with is incompatible with the CUDA driver.
Try re-making the environment while specifying the correct CUDA toolkit version for your hardware and driver.
See :ref:`installation:mamba_hpc` for more details.
Optional dependencies
---------------------
Certain functionalities are only available if you also install other,
optional packages.
* **perses tools**: To use perses, you need to install perses and OpenEye,
and you need a valid OpenEye license. To install both packages, use::
$ mamba install -c openeye perses openeye-toolkits
Supported Hardware
------------------
We currently support the following CPU architectures:
* ``linux-64``
* ``osx-arm64``
For simulation preparation, any supported platform is suitable.
We test our software regularly by performing vacuum transformations on ``linux-64`` using the OpenMM CUDA platform.
While OpenMM supports OpenCL, we do not regularly test that platform (the CUDA platform is more performant) so we do not recommend using that platform without performing your own verification of correctness.
For production use, we recommend the ``linux-64`` platform with NVIDIA GPUs for optimal performance.
When using an OpenMM based protocol on NVIDIA GPUs, we recommend driver version ``525.60.13`` or greater.
The minimum driver version required when installing from conda-forge is ``450.36.06``, but newer versions of OpenMM may not support that driver version as CUDA 11 will be removed the build matrix.
Miniforge Installation Guide
----------------------------
.. _Miniforge: https://github.com/conda-forge/miniforge?tab=readme-ov-file#miniforge
`Miniforge`_ provides minimal installers for either ``conda`` or ``mamba``, and enables easy installation of other software that ``openfe`` needs, such as OpenMM and AmberTools.
We recommend using ``miniforge`` to install ``mamaba`` because it is faster than ``conda`` and comes preconfigured to use ``conda-forge``.
To install and configure ``miniforge``, you need to know your operating system, your machine architecture (output of ``uname -m``), and your shell (in most cases, can be determined from ``echo $SHELL``).
Select your operating system and architecture from the tool below, and run the commands it suggests.
.. raw:: html
You should then close your current session and open a fresh login to ensure that everything is properly registered.
You can now proceed to use ``mamba`` commands as instructed above.
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
================================================
FILE: docs/reference/api/alchemical_network_planning.rst
================================================
.. _Alchemical Network Planning:
Simulation Campaign Planning
============================
While a :class:`LigandNetwork` describes a network of ligands and their atom
mappings, a :class:`AlchemicalNetwork` describes a single replicate of a
simulation campaign. It includes all the information needed to perform the
simulation, and so implicitly includes the :class:`LigandNetwork`.
Alchemical Simulations
~~~~~~~~~~~~~~~~~~~~~~
Descriptions of anticipated alchemical simulation campaigns.
.. module:: openfe
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
Transformation
AlchemicalNetwork
Alchemical Network Planners
---------------------------
Alchemical network planners are objects that pull all the ideas in OpenFE
into a quick setup for simulation. The goal is to create the
:class:`.AlchemicalNetwork` that represents an entire simulation campaign,
starting from a bare amount of user input.
.. module:: openfe.setup
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
RBFEAlchemicalNetworkPlanner
RHFEAlchemicalNetworkPlanner
================================================
FILE: docs/reference/api/atom_mappers.rst
================================================
.. _Atom Mappers:
Atom Mappings
=============
Tools for mapping atoms in one molecule to those in another. Used to generate efficient ligand networks.
.. module:: openfe.setup.atom_mapping
.. rubric:: Abstract Base Class
.. autosummary::
:nosignatures:
:toctree: generated/
LigandAtomMapper
.. rubric:: Implementations
.. autosummary::
:nosignatures:
:toctree: generated/
KartografAtomMapper
LomapAtomMapper
PersesAtomMapper
.. rubric:: Data Types
.. autosummary::
:nosignatures:
:toctree: generated/
LigandAtomMapping
.. _Atom Map Scorers:
Atom Map Scorers
----------------
Scoring functions for a mapping between ligands. These are used as objective functions for :any:`Ligand Network Planners`.
Lomap Scorers
~~~~~~~~~~~~~
Scorers implemented by the `LOMAP `_ package.
.. apparently we need the atom_mapping because internally autofunction is
trying ``import openfe.setup.lomap_scorers``, which doesn't work (whereas
``from openfe.setup import lomap_scorers`` does)
.. module:: openfe.setup.atom_mapping.lomap_scorers
.. autosummary::
:nosignatures:
:toctree: generated/
default_lomap_score
ecr_score
mcsr_score
mncar_score
atomic_number_score
hybridization_score
sulfonamides_score
heterocycles_score
transmuting_methyl_into_ring_score
transmuting_ring_sizes_score
Perses Scorers
~~~~~~~~~~~~~~
Scorers implemented by the `Perses `_ package.
.. module:: openfe.setup.atom_mapping.perses_scorers
.. autosummary::
:nosignatures:
:toctree: generated/
default_perses_scorer
================================================
FILE: docs/reference/api/defining_and_executing_simulations.rst
================================================
.. _reference_execution:
Defining and Executing Simulations
==================================
.. _executors:
Executing Simulations
---------------------
.. module:: openfe
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
execute_DAG
General classes
---------------
.. module:: openfe
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
ProtocolDAG
ProtocolUnitResult
ProtocolUnitFailure
ProtocolDAGResult
Specialised classes
-------------------
These classes are abstract classes that are specialised (subclassed) for an individual Protocol.
.. module:: openfe
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
Protocol
ProtocolUnit
ProtocolResult
================================================
FILE: docs/reference/api/index.rst
================================================
.. _api:
.. note::
We have reproduced API documentation from the `gufe`_ package here for convenience.
`gufe`_ serves as a foundation layer for openfe, providing abstract base classes and object models, and so might be more useful for developers.
Python API Reference
====================
.. toctree::
:maxdepth: 2
systems_and_components
atom_mappers
ligand_network
alchemical_network_planning
defining_and_executing_simulations
openmm_rfe
openmm_solvation_afe
openmm_binding_afe
openmm_septop
openmm_md
openmm_protocol_settings
.. _gufe: https://gufe.openfree.energy/en/stable/api.html
================================================
FILE: docs/reference/api/ligand_network.rst
================================================
Ligand Network Tools
====================
.. module:: openfe.setup
:noindex:
Ligand Network
--------------
A network of mutations between ligands.
.. autosummary::
:nosignatures:
:toctree: generated/
LigandNetwork
.. _Ligand Network Planners:
Network Planners
~~~~~~~~~~~~~~~~
.. module:: openfe.setup.ligand_network_planning
Functions that build a :class:`.LigandNetwork` from a collection of :class:`SmallMoleculeComponents` by optimizing over a `scoring function `_.
.. autosummary::
:nosignatures:
:toctree: generated/
generate_radial_network
generate_maximal_network
generate_minimal_spanning_network
generate_minimal_redundant_network
generate_lomap_network
.. _Ligand Network Loaders:
Network Loaders
~~~~~~~~~~~~~~~
Functions to load a :class:`.LigandNetwork` from equivalent classes in other packages, or to specify one by hand.
.. autosummary::
:nosignatures:
:toctree: generated/
generate_network_from_names
generate_network_from_indices
load_orion_network
load_fepplus_network
================================================
FILE: docs/reference/api/openmm_binding_afe.rst
================================================
OpenMM Absolute Binding Free Energy Protocol
============================================
.. _afe binding protocol api:
This section provides details about the OpenMM Absolute Binding Free Energy Protocol
implemented in OpenFE.
Protocol API specification
--------------------------
.. module:: openfe.protocols.openmm_afe.equil_binding_afe_method
.. autosummary::
:nosignatures:
:toctree: generated/
AbsoluteBindingProtocol
ABFEComplexAnalysisUnit
ABFEComplexSetupUnit
ABFEComplexSimUnit
ABFESolventAnalysisUnit
ABFESolventSetupUnit
ABFESolventSimUnit
AbsoluteBindingProtocolResult
Protocol Settings
-----------------
Below are the settings which can be tweaked in the protocol. The default settings (accessed using :meth:`AbsoluteBindingProtocol.default_settings`) will automatically populate settings which we have found to be useful for running binding free energy calculations. There will however be some cases (such as when calculating difficult to converge systems) where you will need to tweak some of the following settings.
.. module:: openfe.protocols.openmm_afe.equil_afe_settings
.. autopydantic_model:: AbsoluteBindingSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:exclude-members: get_defaults
:member-order: bysource
================================================
FILE: docs/reference/api/openmm_md.rst
================================================
OpenMM Molecular Dynamics (MD) Protocol
=======================================
.. _md protocol api:
A Protocol for running MD simulation using OpenMM.
Protocol API Specification
--------------------------
.. module:: openfe.protocols.openmm_md
.. autosummary::
:nosignatures:
:toctree: generated/
PlainMDProtocol
PlainMDSetupUnit
PlainMDSimulationUnit
PlainMDProtocolResult
Protocol Settings
-----------------
.. module:: openfe.protocols.openmm_md.plain_md_settings
.. autopydantic_model:: PlainMDProtocolSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:exclude-members: get_defaults
:member-order: bysource
================================================
FILE: docs/reference/api/openmm_protocol_settings.rst
================================================
OpenMM Protocol Settings
========================
.. _openmm protocol settings api:
This page documents the Settings classes used by OpenMM-based Protocols.
Details on which of these Settings classes are used by a given Protocol
can be found on the individual Protocol API reference documentation pages:
* :ref:`OpenMM Absolute Solvation Free Energy `
* :ref:`OpenMM Relative Free Energy `
* :ref:`OpenMM Relative Free Energy using SepTop `
* :ref:`OpenMM Molecular Dynamics Protocol `
Shared OpenMM Protocol Settings
-------------------------------
The following are Settings classes which are shared between multiple
OpenMM-based Protocols. Please note that not all Protocols use these
Settings classes.
.. module:: openfe.protocols.openmm_utils.omm_settings
.. autopydantic_model:: IntegratorSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: MDOutputSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: MDSimulationSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: OpenMMEngineSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: OpenFFPartialChargeSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: OpenMMSolvationSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: OpenMMSystemGeneratorFFSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: ThermoSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
Shared MultiState OpenMM Protocol Settings
------------------------------------------
Protocol Settings shared between MultiState simulation protocols.
These currently include the following Protocols:
* :ref:`OpenMM Absolute Solvation Free Energy `
* :ref:`OpenMM Relative Free Energy `
* :ref:`OpenMM Relative Free Energy using SepTop `
.. autopydantic_model:: MultiStateOutputSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: MultiStateSimulationSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
================================================
FILE: docs/reference/api/openmm_rfe.rst
================================================
OpenMM Relative Free Energy Protocol
====================================
.. _rfe protocol api:
This section provides details about the OpenMM Relative Free Energy Protocol
implemented in OpenFE.
Protocol API specification
--------------------------
.. module:: openfe.protocols.openmm_rfe.equil_rfe_methods
.. autosummary::
:nosignatures:
:toctree: generated/
RelativeHybridTopologyProtocol
HybridTopologySetupUnit
HybridTopologyMultiStateSimulationUnit
HybridTopologyMultiStateAnalysisUnit
RelativeHybridTopologyProtocolResult
Protocol Settings
-----------------
Below are the settings which can be tweaked in the protocol. The default settings (accessed using :meth:`RelativeHybridTopologyProtocol.default_settings`) will automatically populate a settings which we have found to be useful for running relative binding free energies using explicit solvent. There will however be some cases (such as when doing gas phase calculations) where you will need to tweak some of the following settings.
.. autopydantic_model:: openfe.protocols.openmm_rfe.equil_rfe_settings.RelativeHybridTopologyProtocolSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:exclude-members: get_defaults
:member-order: bysource
Protocol Specific Settings Classes
----------------------------------
Below are Settings classes which are unique to the ``RelativeHybridTopologyProtocol``.
.. autopydantic_model:: openfe.protocols.openmm_rfe.equil_rfe_settings.AlchemicalSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: openfe.protocols.openmm_rfe.equil_rfe_settings.LambdaSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
================================================
FILE: docs/reference/api/openmm_septop.rst
================================================
OpenMM Separated Topologies Protocol
====================================
.. _septop protocol api:
This section provides details about the OpenMM Separated Topologies Protocol
implemented in OpenFE.
Protocol API specification
--------------------------
.. module:: openfe.protocols.openmm_septop.equil_septop_method
.. autosummary::
:nosignatures:
:toctree: generated/
SepTopProtocol
SepTopComplexSetupUnit
SepTopComplexRunUnit
SepTopComplexAnalysisUnit
SepTopSolventSetupUnit
SepTopSolventRunUnit
SepTopSolventAnalysisUnit
SepTopProtocolResult
Protocol Settings
-----------------
Below are the settings which can be tweaked in the protocol. The default settings (accessed using :meth:`SepTopProtocol.default_settings`) will automatically populate settings which we have found to be useful for running a Separated Topologies free energy calculation. There will however be some cases (such as when calculating difficult to converge systems) where you will need to tweak some of the following settings.
.. module:: openfe.protocols.openmm_septop.equil_septop_settings
.. autopydantic_model:: SepTopSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:exclude-members: get_defaults
:member-order: bysource
Protocol Specific Settings Classes
----------------------------------
Below are Settings classes which are unique to the `SepTopProtocol`.
.. autopydantic_model:: AlchemicalSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: LambdaSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: SepTopEquilOutputSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
================================================
FILE: docs/reference/api/openmm_solvation_afe.rst
================================================
OpenMM Absolute Solvation Free Energy Protocol
==============================================
.. _afe solvation protocol api:
This section provides details about the OpenMM Absolute Solvation Free Energy Protocol
implemented in OpenFE.
Protocol API specification
--------------------------
.. module:: openfe.protocols.openmm_afe.equil_solvation_afe_method
.. autosummary::
:nosignatures:
:toctree: generated/
AbsoluteSolvationProtocol
AHFESolventAnalysisUnit
AHFESolventSetupUnit
AHFESolventSimUnit
AHFEVacuumAnalysisUnit
AHFEVacuumSetupUnit
AHFEVacuumSimUnit
AbsoluteSolvationProtocolResult
Protocol Settings
-----------------
Below are the settings which can be tweaked in the protocol. The default settings (accessed using :meth:`AbsoluteSolvationProtocol.default_settings`) will automatically populate settings which we have found to be useful for running solvation free energy calculations. There will however be some cases (such as when calculating difficult to converge systems) where you will need to tweak some of the following settings.
.. autopydantic_model:: openfe.protocols.openmm_afe.equil_afe_settings.AbsoluteSolvationSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:exclude-members: get_defaults
:member-order: bysource
Protocol Specific Settings Classes
----------------------------------
Below are Settings classes which are unique to the ``AbsoluteSolvationProtocol``.
.. autopydantic_model:: openfe.protocols.openmm_afe.equil_afe_settings.AlchemicalSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
.. autopydantic_model:: openfe.protocols.openmm_afe.equil_afe_settings.LambdaSettings
:model-show-json: False
:model-show-field-summary: False
:model-show-config-member: False
:model-show-config-summary: False
:model-show-validator-members: False
:model-show-validator-summary: False
:field-list-validators: False
:inherited-members: SettingsBaseModel
:member-order: bysource
================================================
FILE: docs/reference/api/systems_and_components.rst
================================================
Chemical Systems and Components
===============================
We describe a chemical system as being made up of one or more "components," e.g., solvent, protein, or small molecule. The :class:`.ChemicalSystem` object joins components together into a simulation system.
.. module:: openfe
:noindex:
.. autosummary::
:nosignatures:
:toctree: generated/
ChemicalSystem
Transformation
Component
SmallMoleculeComponent
ProteinComponent
ProteinMembraneComponent
SolventComponent
SolvatedPDBComponent
Chemical System Generators
--------------------------
.. module:: openfe.setup.chemicalsystem_generator
.. autosummary::
:nosignatures:
:toctree: generated/
EasyChemicalSystemGenerator
================================================
FILE: docs/reference/cli/charge_molecules.rst
================================================
.. _cli_charge_molecules:
``charge-molecules`` command
============================
.. click:: openfecli.commands.generate_partial_charges:charge_molecules
:prog: openfe charge-molecules
================================================
FILE: docs/reference/cli/gather.rst
================================================
.. _cli_gather:
``gather`` command
====================
Currently, ``openfe gather`` is only able to gather results from Relative Binding Free Energy (RBFE) calculations.
To gather results from ABFE or SepTop protocols, you may use the experimental :ref:`openfe gather-abfe ` and :ref:`openfe gather-septop ` CLI commands, but please note that these commands are still under development and liable to change in future releases, and meant to be used only for exploratory work.
.. click:: openfecli.commands.gather:gather
:prog: openfe gather
.. _gather-abfe:
.. click:: openfecli.commands.gather_abfe:gather_abfe
:prog: openfe gather-abfe
.. _gather-septop:
.. click:: openfecli.commands.gather_septop:gather_septop
:prog: openfe gather-septop
================================================
FILE: docs/reference/cli/index.rst
================================================
.. _cli-reference:
CLI Reference
=============
.. toctree::
:maxdepth: 1
charge_molecules
plan_rhfe_network
plan_rbfe_network
quickrun
gather
================================================
FILE: docs/reference/cli/plan_rbfe_network.rst
================================================
.. _cli_plan-rbfe-network:
``plan-rbfe-network`` command
=============================
.. click:: openfecli.commands.plan_rbfe_network:plan_rbfe_network
:prog: openfe plan-rbfe-network
================================================
FILE: docs/reference/cli/plan_rhfe_network.rst
================================================
.. _cli_plan-rhfe-network:
``plan-rhfe-network`` command
=============================
.. click:: openfecli.commands.plan_rhfe_network:plan_rhfe_network
:prog: openfe plan-rhfe-network
================================================
FILE: docs/reference/cli/quickrun.rst
================================================
.. _cli_quickrun:
``quickrun`` command
====================
.. click:: openfecli.commands.quickrun:quickrun
:prog: openfe quickrun
================================================
FILE: docs/reference/index.rst
================================================
Reference
=========
This contains details of the Python API as well as a reference to the
command line interface.
.. note::
We have reproduced API documentation from the `gufe`_ package here for convenience.
`gufe`_ serves as a foundation layer for openfe, providing abstract base classes and object models, and so might be more useful for developers.
.. toctree::
:maxdepth: 2
api/index
cli/index
.. _gufe: https://gufe.readthedocs.io/en/latest/api.html
================================================
FILE: docs/tutorials/.gitignore
================================================
assets/
inputs/
================================================
FILE: docs/tutorials/abfe_analysis_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/abfe_tutorial/abfe_analysis.ipynb"
}
================================================
FILE: docs/tutorials/abfe_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/abfe_tutorial/abfe_tutorial.ipynb",
"extra-media": [
"../ExampleNotebooks/abfe_tutorial/abfe-cycle.png"
]
}
================================================
FILE: docs/tutorials/ahfe_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/ahfe_tutorial/ahfe_tutorial.ipynb",
"extra-media": [
"../ExampleNotebooks/ahfe_tutorial/ahfe_cycle.png"
]
}
================================================
FILE: docs/tutorials/charge_molecules_cli_tutorial.rst
================================================
.. _charge_molecules_cli_tutorial:
.. include:: /ExampleNotebooks/cli_tutorials/cli_charge_molecules.md
:parser: myst_parser.sphinx_
================================================
FILE: docs/tutorials/index.rst
================================================
Tutorials
=========
.. todo: make sure we can inline the tutorial, for now we only provide links
Below is a collection of tutorials that demonstrate key elements of OpenFE tooling.
You can clone the `Example Notebooks Repository `_ to explore any of these tutorials interactively.
Relative Free Energies
----------------------
- :any:`Python API Showcase `: Start here! An introduction to OpenFE's Python API and approach to performing a relative binding free energy calculation.
- :any:`RBFE using the Python API `: A step-by-step tutorial for using the Python API to calculate relative binding free energies for TYK2.
- :ref:`RBFE using the CLI `: A step-by-step tutorial for using the OpenFE command line interface (CLI) to calculate relative binding free energies for TYK2.
- :any:`RBFE with membrane systems `: A step-by-step guide to setting up an RBFE calculation with special considerations for membrane systems.
Absolute Free Energies
----------------------
- :any:`Absolute Absolute Free Energy Protocol `: A walk-through of calculating the absolute binding free energy of toluene to T4 Lysozyme.
- :any:`Absolute Solvation Free Energy Protocol `: A walk-through of calculating the hydration free energy of a benzene ligand.
Relative Free Energies using Separated Topologies
-------------------------------------------------
- :any:`SepTop Protocol `: A walk-through of calculating the relative binding free energy between TYK2 ligands using a Separated Topologies approach.
Molecular Dynamics (MD)
-----------------------
- :any:`MD protocol `: A walk-through of running a conventional (non-alchemical) MD simulation of benzene bound to T4-lysozyme L99A.
Post-Simulation Analysis
------------------------
- :any:`Cinnabar tutorial `: A tutorial for using the `cinnabar `_ Python package to analyze (e.g. generating MLE estimates of absolute free energies) and plot networks of relative free energy results.
Generating Partial Charges
--------------------------
.. todo: this should be in cookbook
- :ref:`Generating Partial Charges CLI tutorial `: how to use the CLI to assign and store partial charges for mall molecules which can be used throughout the OpenFE ecosystem.
.. toctree::
:maxdepth: 1
:hidden:
showcase_notebook
rbfe_python_tutorial
rbfe_cli_tutorial
rbfe_membrane_protein
abfe_tutorial
abfe_analysis_tutorial
ahfe_tutorial
septop_tutorial
septop_analysis_tutorial
md_tutorial
plotting_with_cinnabar
charge_molecules_cli_tutorial
================================================
FILE: docs/tutorials/md_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/openmm_md/plain_md.ipynb",
"extra-media": [
"../ExampleNotebooks/openmm_md/assets/"
]
}
================================================
FILE: docs/tutorials/plotting_with_cinnabar.nblink
================================================
{
"path": "../ExampleNotebooks/plotting_rbfes_with_cinnabar/PlottingFreeEnergiesUsingCinnabar.ipynb"
}
================================================
FILE: docs/tutorials/rbfe_cli_tutorial.rst
================================================
.. _rbfe_cli_tutorial:
.. include:: /ExampleNotebooks/rbfe_tutorial/cli_tutorial.md
:parser: myst_parser.sphinx_
================================================
FILE: docs/tutorials/rbfe_membrane_protein.nblink
================================================
{
"path": "../ExampleNotebooks/membranes/rbfe_membrane_protein.ipynb"
}
================================================
FILE: docs/tutorials/rbfe_python_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/rbfe_tutorial/rbfe_python_tutorial.ipynb"
}
================================================
FILE: docs/tutorials/septop_analysis_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/openmm_septop/septop_analysis.ipynb"
}
================================================
FILE: docs/tutorials/septop_tutorial.nblink
================================================
{
"path": "../ExampleNotebooks/openmm_septop/septop_tutorial.ipynb",
"extra-media": [
"../ExampleNotebooks/openmm_septop/septop_cycle.png"
]
}
================================================
FILE: docs/tutorials/showcase_notebook.nblink
================================================
{
"path": "../ExampleNotebooks/showcase/openfe_showcase.ipynb",
"extra-media": [
"../ExampleNotebooks/showcase/tyk2.png",
"../ExampleNotebooks/showcase/OFE-color-horizontal.png",
"../ExampleNotebooks/showcase/inputs"
]
}
================================================
FILE: environment.yml
================================================
name: openfe_env
channels:
- conda-forge
dependencies:
- cinnabar ~=0.5.0
- click >=8.2.0
- coverage
- dask>=2025 # temporary fix for https://github.com/openforcefield/openff-units/issues/140
- duecredit<0.10
- kartograf>=1.2.0
- konnektor~=0.2.0
- lomap2>=3.2.1
- networkx
- numpy
- openfe-analysis>=0.4.0 # min pin https://github.com/OpenFreeEnergy/openfe/issues/1834#issuecomment-3920079481, no max to check issues with new versions
- openff-interchange-base >=0.5.0,!= 0.5.1 # https://github.com/openforcefield/openff-interchange/issues/1450 and https://github.com/OpenFreeEnergy/openfe/pull/1901
- openff-nagl-base >=0.3.3
- openff-nagl-models>=0.1.2
- openff-toolkit-base >=0.16.2
- openff-units==0.3.1 # https://github.com/OpenFreeEnergy/openfe/pull/1374
- openmm ~=8.4.0 # omit 8.3.0 and 8.3.1 due to https://github.com/openmm/openmm/pull/5069
- openmmforcefields >=0.15.1 # min needed for https://github.com/openmm/openmmforcefields/pull/414
- openmmtools >=0.26 # fix to support membrane barostat: https://github.com/choderalab/openmmtools/pull/798
- packaging
- pandas
- parmed >=4.3.1 # fix to support numpy >=2.3: https://github.com/ParmEd/ParmEd/pull/1387
- perses>=0.10.3
- plugcli
- pint>=0.24.0
- pip
- pooch >= 1.9.0 # min needed for https://github.com/fatiando/pooch/issues/502
- py3dmol
- pydantic >= 2.0.0, <2.12.0 # https://github.com/openforcefield/openff-interchange/issues/1346
- pygraphviz
- pytest
- pytest-xdist
- pytest-cov
- pytest-regressions
- pytest-rerunfailures
- pyyaml
- rdkit
- rich
- tqdm
- typing-extensions
- zstandard
# Issue #443
- pymbar>4.0
# docs
- autodoc-pydantic>=2.0
- pydata-sphinx-theme
- sphinx-click
- sphinx-toolbox
# Control blas/openmp threads
- threadpoolctl
- pip:
- git+https://github.com/OpenFreeEnergy/gufe@main
- run_constrained:
# drop this pin when handled upstream in espaloma-feedstock
- smirnoff99frosst>=1.1.0.1 #https://github.com/openforcefield/smirnoff99Frosst/issues/109
================================================
FILE: news/TEMPLATE.rst
================================================
**Added:**
*
**Changed:**
*
**Deprecated:**
*
**Removed:**
*
**Fixed:**
*
**Security:**
*
================================================
FILE: production/Dockerfile
================================================
FROM mambaorg/micromamba:1.4.1
LABEL org.opencontainers.image.source=https://github.com/OpenFreeEnergy/openfe
LABEL org.opencontainers.image.description="A Python package for executing alchemical free energy calculations."
LABEL org.opencontainers.image.licenses=MIT
# OpenFE Version we want to build
ARG VERSION
# install ps
USER root
RUN apt-get update && apt-get install -y --no-install-recommends \
procps \
&& rm -rf /var/lib/apt/lists/*
USER $MAMBA_USER
# Don't buffer stdout & stderr streams, so if there is a crash no partial buffer output is lost
# https://docs.python.org/3/using/cmdline.html#cmdoption-u
ENV PYTHONUNBUFFERED=1
COPY --chown=$MAMBA_USER:$MAMBA_USER production/environment.yml /tmp/env.yaml
RUN micromamba install -y -n base git "openfe==$VERSION" -f /tmp/env.yaml && \
micromamba clean --all --yes
# Ensure that conda environment is automatically activated
# https://github.com/mamba-org/micromamba-docker#running-commands-in-dockerfile-within-the-conda-environment
ARG MAMBA_DOCKERFILE_ACTIVATE=1
================================================
FILE: production/environment.yml
================================================
name: openfe_env
channels:
- conda-forge
dependencies:
- cudatoolkit==11.8
- jupyterlab
- notebook
- openfe
- pip
- py3dmol
- pytest
- pytest-xdist
- python==3.12.*
- rdkit==2025.09.1
================================================
FILE: pyproject.toml
================================================
[build-system]
build-backend = "setuptools.build_meta"
requires = [
"setuptools>=77.0.3",
"setuptools-scm>=8",
]
[project]
name = "openfe"
description = ""
readme = "README.md"
license = "MIT"
license-files = [ "LICENSE" ]
authors = [ { name = "The OpenFE developers", email = "openfreeenergy@omsf.io" } ]
requires-python = ">=3.11"
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Science/Research",
"Operating System :: POSIX",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Scientific/Engineering :: Chemistry",
]
dynamic = [ "version" ]
urls = { Homepage = "https://github.com/OpenFreeEnergy/openfe" }
scripts.openfe = "openfecli.cli:main"
[tool.setuptools]
zip-safe = false
include-package-data = true
package-data.openfe = [ '"./src/openfe/tests/data/lomap_basic/toluene.mol2"' ]
packages.find.where = [ "src" ]
packages.find.namespaces = false
[tool.setuptools_scm]
fallback_version = "0.0.0"
[tool.ruff]
line-length = 100
format.docstring-code-format = true
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
lint.select = [
"E", # pycodestyle errors
"F", # Pyflakes
"I", # isort
"W", # pycodestyle warnings
# "C901" # mccabe complexity TODO: add this back in
# "UP", # TODO: add this in
]
lint.ignore = [
"E402", # module-level import not at top (conflicts w/ isort)
"E501", # line length too long, resolve this for comments
"F401", # unused imports (TODO: we should fix these)
"F811",
"F841",
"UP03", # pyupgrade linting (TODO: we should fix these)
]
lint.isort.known-first-party = [ "openfe" ]
[tool.mypy]
files = "src/openfe" # TODO: add src/openfecli
ignore_missing_imports = true
warn_unused_ignores = true
[tool.coverage]
run.omit = [
"src/*/tests/dev/*py",
"src/*/tests/protocols/test_openmm_rfe_slow.py",
"src/openfe/due.py",
]
report.exclude_lines = [
"-no-cov",
'if __name__ == "__main__"',
"pragma: no cover",
"pragma: no-cover",
"raise NotImplementedError",
]
================================================
FILE: rever.xsh
================================================
$PROJECT = $GITHUB_REPO = 'openfe'
$GITHUB_ORG = 'OpenFreeEnergy'
$ACTIVITIES = ['changelog']
$CHANGELOG_FILENAME = 'docs/CHANGELOG.rst'
$CHANGELOG_TEMPLATE = 'TEMPLATE.rst'
================================================
FILE: src/openfe/__init__.py
================================================
# Before we do anything else, we want to disable JAX
# acceleration by default but if a user has set
# PYMBAR_DISABLE_JAX to some value, we want to keep
# it
import logging
import os
logger = logging.getLogger(__name__)
if "PYMBAR_DISABLE_JAX" in os.environ:
logger.info(
f"PYMBAR_DISABLE_JAX set to {os.environ.get('PYMBAR_DISABLE_JAX')}. See https://docs.openfree.energy/en/latest/guide/troubleshooting.html#pymbar-disable-jax for more details"
)
# setdefault will only set PYMBAR_DISABLE_JAX if it is unset
os.environ.setdefault("PYMBAR_DISABLE_JAX", "TRUE")
# We need to do this first so that we can set up our
# log control since some modules have warnings on import
from openfe.utils import logging_control
logging_control._silence_message(
msg=[
"****** PyMBAR will use 64-bit JAX! *******",
],
logger_names=[
"pymbar.mbar_solvers",
],
)
logging_control._silence_message(
msg=[
"Warning on use of the timeseries module:",
],
logger_names=[
"pymbar.timeseries",
],
)
logging_control._append_logger(
suffix="\n \n[OPENFE]: The simulation is still using the compute platform specified in the settings \n See this URL for more information: https://docs.openfree.energy/en/latest/guide/troubleshooting.html#jax-warnings \n\n",
logger_names="jax._src.xla_bridge",
)
from importlib.metadata import version
from gufe import (
AlchemicalNetwork,
ChemicalSystem,
Component,
LigandAtomMapping,
NonTransformation,
ProteinComponent,
ProteinMembraneComponent,
SmallMoleculeComponent,
SolvatedPDBComponent,
SolventComponent,
Transformation,
)
from gufe.protocols import (
Protocol,
ProtocolDAG,
ProtocolDAGResult,
ProtocolResult,
ProtocolUnit,
ProtocolUnitFailure,
ProtocolUnitResult,
execute_DAG,
)
from . import analysis, orchestration, setup, utils
from .setup import (
KartografAtomMapper,
LigandAtomMapper,
LigandNetwork,
LomapAtomMapper,
PersesAtomMapper,
ligand_network_planning,
lomap_scorers,
perses_scorers,
)
__version__ = version("openfe")
================================================
FILE: src/openfe/analysis/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
from . import plotting
================================================
FILE: src/openfe/analysis/plotting.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
import warnings
from typing import Optional, Union
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
from matplotlib.axes import Axes
from openff.units import Quantity
def plot_lambda_transition_matrix(matrix: npt.NDArray) -> Axes:
"""
Plot out a transition matrix.
Parameters
----------
matrix : npt.NDArray
A nstates by nstates matrix of transition estimates.
Returns
-------
ax : matplotlib.axes.Axes
An Axes object to plot.
Raises
------
UserWarning
If any row or column exceeds a sum value of 1.01. This indicates
an incorrect overlap/probability matrix.
Notes
-----
Borrowed from `alchemlyb `_
which itself borrows from `alchemical-analysis `_.
"""
num_states = len(matrix)
# Check if any row or column isn't close to 1.0
# Throw a warning if it's the case
if not np.allclose(matrix.sum(axis=0), 1.0) or not np.allclose(matrix.sum(axis=1), 1.0):
wmsg = (
"Overlap/probability matrix exceeds a sum of 1.0 in one or "
"more columns or rows of the matrix. This indicates an "
"incorrect overlap/probability matrix."
)
warnings.warn(wmsg)
fig, ax = plt.subplots(figsize=(num_states / 2, num_states / 2))
ax.axis("off")
for i in range(num_states):
if i != 0:
ax.axvline(x=i, ls="-", lw=0.5, color="k", alpha=0.25)
ax.axhline(y=i, ls="-", lw=0.5, color="k", alpha=0.25)
for j in range(num_states):
val = matrix[i, j]
# Catch if 0.05 from 0 or 1
# https://github.com/OpenFreeEnergy/openfe/issues/806
if matrix[j, i] < 0.005:
# This replicates the same behaviour as alchemical-analysis & alchemlyb
# i.e. near-zero values will just not be annotated
val_str = ""
elif matrix[j, i] > 0.995:
val_str = "{:.2f}".format(matrix[j, i])[:4]
else:
val_str = "{:.2f}".format(matrix[j, i])[1:]
rel_prob = val / matrix.max()
# shade box
ax.fill_between(
[i, i + 1],
[num_states - j, num_states - j],
[num_states - (j + 1), num_states - (j + 1)],
color="k",
alpha=rel_prob,
)
# annotate box
ax.annotate(
val_str,
xy=(i, j),
xytext=(i + 0.5, num_states - (j + 0.5)),
size=8,
va="center",
ha="center",
color=("k" if rel_prob < 0.5 else "w"),
)
# annotate axes
base_settings: dict[str, Union[str, int]] = {
"size": 10,
"va": "center",
"ha": "center",
"color": "k",
"family": "sans-serif",
}
for i in range(num_states):
ax.annotate(
text=f"{i}",
xy=(i + 0.5, 1),
xytext=(i + 0.5, num_states + 0.5),
xycoords="data",
textcoords=None,
arrowprops=None,
annotation_clip=None,
**base_settings,
)
ax.annotate(
text=f"{i}",
xy=(-0.5, num_states - (num_states - 0.5)),
xytext=(-0.5, num_states - (i + 0.5)),
xycoords="data",
textcoords=None,
arrowprops=None,
annotation_clip=None,
**base_settings,
)
ax.annotate(
r"$\lambda$",
xy=(-0.5, num_states - (num_states - 0.5)),
xytext=(-0.5, num_states + 0.5),
xycoords="data",
textcoords=None,
arrowprops=None,
annotation_clip=None,
**base_settings,
)
# add border
ax.plot([0, num_states], [0, 0], "k-", lw=2.0)
ax.plot([num_states, num_states], [0, num_states], "k-", lw=2.0)
ax.plot([0, num_states], [num_states, num_states], "k-", lw=2.0)
ax.plot([0, 0], [0, num_states], "k-", lw=2.0)
return ax
def plot_convergence(
forward_and_reverse: dict[str, Union[npt.NDArray, Quantity]], units: Quantity
) -> Axes:
"""
Plot a Reverse and Forward convergence analysis of the
free energies.
Parameters
----------
forward_and_reverse : dict[str, npt.NDArray]
A dictionary containing the reverse and forward
values of the free energies sampled along a given fraction
of the sample size.
units : openff.units.Quantity
The units the free energies are provided in.
Returns
-------
ax : matplotlib.axes.Axes
An Axes object to plot.
Notes
-----
Modified from `alchemical analysis <>`_
"""
known_units = {
"kilojoule_per_mole": "kJ/mol",
"kilojoules_per_mole": "kJ/mol",
"kilocalorie_per_mole": "kcal/mol",
"kilocalories_per_mole": "kcal/mol",
}
try:
plt_units = known_units[str(units)]
except KeyError:
errmsg = (
f"Unknown plotting units {units} passed, acceptable "
"values are kilojoule(s)_per_mole and "
"kilocalorie(s)_per_mole"
)
raise ValueError(errmsg)
fig, ax = plt.subplots(figsize=(8, 6))
# Old style alchemical analysis formatting
plt.setp(ax.spines["bottom"], color="#D2B9D3", lw=3, zorder=-2)
plt.setp(ax.spines["left"], color="#D2B9D3", lw=3, zorder=-2)
for dire in ["top", "right"]:
ax.spines[dire].set_color("none")
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
# Set the overall error bar to the final error for the reverse results
overall_error = forward_and_reverse["reverse_dDGs"][-1].m # type: ignore
final_value = forward_and_reverse["reverse_DGs"][-1].m # type: ignore
ax.fill_between(
[0, 1], final_value - overall_error, final_value + overall_error, color="#D2B9D3", zorder=1
)
ax.errorbar(
forward_and_reverse["fractions"], # type: ignore
[val.m for val in forward_and_reverse["forward_DGs"]], # type: ignore
yerr=[err.m for err in forward_and_reverse["forward_dDGs"]], # type: ignore
color="#736AFF",
lw=3,
zorder=2,
marker="o",
mfc="w",
mew=2.5,
mec="#736AFF",
ms=8,
label="Forward",
)
ax.errorbar(
forward_and_reverse["fractions"], # type: ignore
[val.m for val in forward_and_reverse["reverse_DGs"]], # type: ignore
yerr=[err.m for err in forward_and_reverse["reverse_dDGs"]], # type: ignore
color="#C11B17",
lw=3,
zorder=2,
marker="o",
mfc="w",
mew=2.5,
mec="#C11B17",
ms=8,
label="Reverse",
)
ax.legend(frameon=False)
ax.set_ylabel(r"$\Delta G$" + f" ({plt_units})")
ax.set_xlabel("Fraction of uncorrelated samples")
return ax
def plot_replica_timeseries(
state_timeseries: npt.NDArray,
equilibration_iterations: Optional[int] = None,
) -> Axes:
"""
Plot a the state timeseries of a set of replicas.
Parameters
----------
state_timeseries : npt.NDArray
A 2D n_iterattions by n_states array of the replica timeseries.
equilibration_iterations : Optional[int]
The number of iterations used up as equilibration time.
Returns
-------
ax : matplotlib.axes.Axes
An Axes object to plot.
"""
num_states = len(state_timeseries.T)
fig, ax = plt.subplots(figsize=(num_states, 4))
iterations = [i for i in range(len(state_timeseries))]
for i in range(num_states):
ax.scatter(iterations, state_timeseries.T[i], label=f"replica {i}", s=8)
ax.set_xlabel("Number of simulation iterations")
ax.set_ylabel("Lambda state")
ax.set_title("Change in replica lambda state over time")
if equilibration_iterations is not None:
ax.axvline(
x=equilibration_iterations, color="grey", linestyle="--", label="equilibration limit"
)
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
return ax
def plot_2D_rmsd(data: list[list[float]], vmax=5.0) -> plt.Figure:
"""Plots 2D RMSD for many states
Parameters
----------
data : list[list[float]]
for each state, the 2D RMSD
vmax : float, optional
the value to consider "high" in the colourmap to flag bad values,
defaults to 5.0 (A)
Returns
-------
matplotlib Figure
"""
twod_rmsd_arrs = []
for state in data:
# unpack 2D RMSD data
# we store N(N-1)//2 values, so find N then make symmetric array
N = int((1 + np.sqrt(8 * len(state) + 1)) / 2)
arr = np.zeros((N, N))
arr[np.triu_indices_from(arr, k=1)] = state
arr += arr.T
twod_rmsd_arrs.append(arr)
nplots = len(data) + 1 # + colorbar
# plot on 4 x n grid
nrows = nplots // 4 + (1 if nplots % 4 else 0)
fig, axes = plt.subplots(nrows, 4)
for i, (arr, ax) in enumerate(zip(twod_rmsd_arrs, axes.flatten())):
ax.imshow(arr, vmin=0, vmax=vmax, cmap=plt.get_cmap("cividis"))
ax.axis("off") # turn off ticks/labels
ax.set_title(f"State {i}")
# if we have any leftover plots then we turn them off
# except the last one!
overage = len(axes.flatten()) - len(twod_rmsd_arrs)
for i in range(overage, len(axes.flatten()) - 1):
axes.flatten()[i].set_axis_off()
plt.colorbar(
axes.flatten()[0].images[0],
cax=axes.flatten()[-1],
label="RMSD scale (A)",
orientation="horizontal",
)
fig.suptitle("Protein 2D RMSD")
fig.tight_layout()
return fig
def plot_ligand_COM_drift(time: list[float], data: list[list[float]]):
fig, ax = plt.subplots()
for i, s in enumerate(data):
ax.plot(time, s, label=f"State {i}")
ax.legend(loc="upper left")
ax.set_xlabel("Time (ps)")
ax.set_ylabel("Distance (A)")
ax.set_title("Ligand COM drift")
return fig
def plot_ligand_RMSD(time: list[float], data: list[list[float]]):
fig, ax = plt.subplots()
for i, s in enumerate(data):
ax.plot(time, s, label=f"State {i}")
ax.legend(loc="upper left")
ax.set_xlabel("Time (ps)")
ax.set_ylabel("RMSD (A)")
ax.set_title("Ligand RMSD")
return fig
================================================
FILE: src/openfe/data/__init__.py
================================================
================================================
FILE: src/openfe/data/_downloader.py
================================================
import pooch
from ._registry import zenodo_data_registry
def retrieve_registry_data(zenodo_registry: list[dict], path: str) -> None:
"""Helper function for pulling all test data up-front.
Parameters
----------
path : str
path to store the data - usually a pooch.os_cache instance.
"""
downloader = pooch.DOIDownloader(progressbar=True)
def _infer_processor(fname: str):
if fname.endswith("tar.gz"):
return pooch.Untar()
elif fname.endswith("zip"):
return pooch.Unzip()
else:
return None
for d in zenodo_registry:
pooch.retrieve(
url=d["base_url"] + d["fname"],
known_hash=d["known_hash"],
fname=d["fname"],
processor=_infer_processor(d["fname"]),
downloader=downloader,
path=path,
)
================================================
FILE: src/openfe/data/_registry.py
================================================
import pooch
POOCH_CACHE = pooch.os_cache("openfe")
zenodo_rfe_simulation_nc = dict(
base_url="doi:10.5281/zenodo.15375081/",
fname="simulation.nc",
known_hash="md5:bc4e842b47de17704d804ae345b91599",
)
zenodo_t4_lysozyme_traj = dict(
base_url="doi:10.5281/zenodo.15212342",
fname="t4_lysozyme_trajectory.zip",
known_hash="sha256:e985d055db25b5468491e169948f641833a5fbb67a23dbb0a00b57fb7c0e59c8",
)
zenodo_industry_benchmark_systems = dict(
base_url="doi:10.5281/zenodo.15212342",
fname="industry_benchmark_systems.zip",
known_hash="sha256:2bb5eee36e29b718b96bf6e9350e0b9957a592f6c289f77330cbb6f4311a07bd",
)
zenodo_resume_data = dict(
base_url="doi:10.5281/zenodo.19694844",
fname="multistate_checkpoints.zip",
known_hash="md5:a6bdceff0c4a2f200538edb17c21d443",
)
zenodo_md_resume_data = dict(
base_url="doi:10.5281/zenodo.19694944",
fname="checkpoint.xml",
known_hash="md5:0f3957c263b5def8de727c5c419b31b5",
)
zenodo_data_registry = [
zenodo_rfe_simulation_nc,
zenodo_t4_lysozyme_traj,
zenodo_industry_benchmark_systems,
zenodo_resume_data,
zenodo_md_resume_data,
]
================================================
FILE: src/openfe/due.py
================================================
# emacs: at the end of the file
# ex: set sts=4 ts=4 sw=4 et:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### #
"""
Stub file for a guaranteed safe import of duecredit constructs: if duecredit
is not available.
To use it, place it into your project codebase to be imported, e.g. copy as
cp stub.py /path/tomodule/module/due.py
Note that it might be better to avoid naming it duecredit.py to avoid shadowing
installed duecredit.
Then use in your code as
from .due import due, Doi, BibTeX, Text
See https://github.com/duecredit/duecredit/blob/master/README.md for examples.
Origin: Originally a part of the duecredit
Copyright: 2015-2021 DueCredit developers
License: BSD-2
"""
__version__ = "0.0.9"
class InactiveDueCreditCollector(object):
"""Just a stub at the Collector which would not do anything"""
def _donothing(self, *args, **kwargs):
"""Perform no good and no bad"""
pass
def dcite(self, *args, **kwargs):
"""If I could cite I would"""
def nondecorating_decorator(func):
return func
return nondecorating_decorator
active = False
activate = add = cite = dump = load = _donothing
def __repr__(self):
return self.__class__.__name__ + "()"
def _donothing_func(*args, **kwargs):
"""Perform no good and no bad"""
pass
try:
from duecredit import BibTeX, Doi, Text, Url, due # lgtm [py/unused-import]
if "due" in locals() and not hasattr(due, "cite"):
raise RuntimeError("Imported due lacks .cite. DueCredit is now disabled")
except Exception as e:
if not isinstance(e, ImportError):
import logging
logging.getLogger("duecredit").error("Failed to import duecredit due to %s" % str(e))
# Initiate due stub
due = InactiveDueCreditCollector()
BibTeX = Doi = Url = Text = _donothing_func
# Emacs mode definitions
# Local Variables:
# mode: python
# py-indent-offset: 4
# tab-width: 4
# indent-tabs-mode: nil
# End:
================================================
FILE: src/openfe/orchestration/__init__.py
================================================
================================================
FILE: src/openfe/protocols/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
================================================
FILE: src/openfe/protocols/openmm_afe/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Run absolute free energy calculations using OpenMM and OpenMMTools.
"""
from .abfe_units import (
ABFEComplexAnalysisUnit,
ABFEComplexSetupUnit,
ABFEComplexSimUnit,
ABFESolventAnalysisUnit,
ABFESolventSetupUnit,
ABFESolventSimUnit,
)
from .afe_protocol_results import (
AbsoluteBindingProtocolResult,
AbsoluteSolvationProtocolResult,
)
from .ahfe_units import (
AHFESolventAnalysisUnit,
AHFESolventSetupUnit,
AHFESolventSimUnit,
AHFEVacuumAnalysisUnit,
AHFEVacuumSetupUnit,
AHFEVacuumSimUnit,
)
from .equil_binding_afe_method import (
AbsoluteBindingProtocol,
AbsoluteBindingSettings,
)
from .equil_solvation_afe_method import (
AbsoluteSolvationProtocol,
AbsoluteSolvationSettings,
)
__all__ = [
"AbsoluteSolvationProtocol",
"AbsoluteSolvationSettings",
"AbsoluteSolvationProtocolResult",
"AHFESolventSetupUnit",
"AHFESolventSimUnit",
"AHFESolventAnalysisUnit",
"AHFEVacuumSetupUnit",
"AHFEVacuumSimUnit",
"AHFEVacuumAnalysisUnit",
"AbsoluteBindingProtocol",
"AbsoluteBindingSettings",
"AbsoluteBindingProtocolResult",
"ABFEComplexSetupUnit",
"ABFEComplexSimUnit",
"ABFEComplexAnalysisUnit",
"ABFESolventSetupUnit",
"ABFESolventSimUnit",
"ABFESolventAnalysisUnit",
]
================================================
FILE: src/openfe/protocols/openmm_afe/abfe_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""ABFE Protocol Units --- :mod:`openfe.protocols.openmm_afe.abfe_units`
========================================================================
This module defines the ProtocolUnits for the
:class:`AbsoluteBindingProtocol`.
"""
import logging
import pathlib
from collections.abc import Iterable
import MDAnalysis as mda
import numpy as np
import numpy.typing as npt
from gufe import (
SolventComponent,
)
from gufe.components import Component, SolvatedPDBComponent
from openff.units import Quantity
from openff.units.openmm import to_openmm
from openmm import System
from openmm import unit as ommunit
from openmm.app import Topology as omm_topology
from openmmtools.states import ThermodynamicState
from rdkit import Chem
from openfe.protocols.openmm_afe.equil_afe_settings import (
BoreschRestraintSettings,
SettingsBaseModel,
)
from openfe.protocols.openmm_utils import system_validation
from openfe.protocols.restraint_utils import geometry
from openfe.protocols.restraint_utils.geometry.boresch import BoreschRestraintGeometry
from openfe.protocols.restraint_utils.openmm import omm_restraints
from openfe.protocols.restraint_utils.openmm.omm_restraints import BoreschRestraint
from .base_afe_units import (
BaseAbsoluteMultiStateAnalysisUnit,
BaseAbsoluteMultiStateSimulationUnit,
BaseAbsoluteSetupUnit,
)
logger = logging.getLogger(__name__)
class ComplexComponentsMixin:
def _get_components(self):
"""
Get the relevant components for a complex transformation.
Returns
-------
alchem_comps : dict[str, Component]
A dict of alchemical components
solv_comp : SolventComponent
The SolventComponent of the system
prot_comp : ProteinComponent | None
The protein component of the system, if it exists.
small_mols : dict[SmallMoleculeComponent: OFFMolecule]
SmallMoleculeComponents to add to the system.
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
solv_comp, prot_comp, small_mols = system_validation.get_components(stateA)
off_comps = {m: m.to_openff() for m in small_mols}
# We don't need to check that solv_comp is not None, otherwise
# an error will have been raised when calling `validate_solvent`
# in the Protocol's `_create`.
# Similarly we don't need to check prot_comp
# If there is an SolvatedPDBComponent, we set the solv_comp
# in the complex to the SolvatedPDBComponent, as the SolventComponent
# is only used in the solvent leg
if isinstance(prot_comp, SolvatedPDBComponent):
solv_comp = prot_comp
return alchem_comps, solv_comp, prot_comp, off_comps
class ComplexSettingsMixin:
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a complex transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : ABFEPreEquilOutputSettings
* simulation_settings : SimulationSettings
* output_settings: MultiStateOutputSettings
* restraint_settings: BaseRestraintSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore[attr-defined]
settings = {}
settings["forcefield_settings"] = prot_settings.forcefield_settings
settings["thermo_settings"] = prot_settings.thermo_settings
settings["charge_settings"] = prot_settings.partial_charge_settings
settings["solvation_settings"] = prot_settings.complex_solvation_settings
settings["alchemical_settings"] = prot_settings.alchemical_settings
settings["lambda_settings"] = prot_settings.complex_lambda_settings
settings["engine_settings"] = prot_settings.engine_settings
settings["integrator_settings"] = prot_settings.complex_integrator_settings
settings["equil_simulation_settings"] = prot_settings.complex_equil_simulation_settings
settings["equil_output_settings"] = prot_settings.complex_equil_output_settings
settings["simulation_settings"] = prot_settings.complex_simulation_settings
settings["output_settings"] = prot_settings.complex_output_settings
settings["restraint_settings"] = prot_settings.restraint_settings
return settings
class ABFEComplexSetupUnit(ComplexComponentsMixin, ComplexSettingsMixin, BaseAbsoluteSetupUnit):
"""
Setup unit for the complex phase of absolute binding free energy
transformations.
"""
simtype = "complex"
@staticmethod
def _get_mda_universe(
topology: omm_topology,
positions: ommunit.Quantity | None,
trajectory: pathlib.Path | None,
) -> mda.Universe:
"""
Helper method to get a Universe from an openmm Topology,
and either an input trajectory or a set of positions.
Parameters
----------
topology : openmm.app.Topology
An OpenMM Topology that defines the System.
positions: openmm.unit.Quantity | None
The System's current positions.
Used if a trajectory file is None or is not a file.
trajectory: pathlib.Path | None
A Path to a trajectory file to read positions from.
Returns
-------
mda.Universe
An MDAnalysis Universe of the System.
"""
from MDAnalysis.coordinates.memory import MemoryReader
# If the trajectory file doesn't exist, then we use positions
if trajectory is not None and trajectory.is_file():
return mda.Universe(
topology,
trajectory,
topology_format="OPENMMTOPOLOGY",
)
else:
if positions is None:
raise ValueError("No positions to create the Universe with")
# Positions is an openmm Quantity in nm we need
# to convert to angstroms
return mda.Universe(
topology,
np.array(positions._value) * 10,
topology_format="OPENMMTOPOLOGY",
trajectory_format=MemoryReader,
)
@staticmethod
def _get_idxs_from_residxs(
topology: omm_topology,
residxs: Iterable[int],
) -> list[int]:
"""
Helper method to get the a list of atom indices which belong to a list
of residues.
Parameters
----------
topology : openmm.app.Topology
An OpenMM Topology that defines the System.
residxs : Iterable[int]
A list of residue numbers who's atoms we should get atom indices.
Returns
-------
atom_ids : list[int]
A list of atom indices.
TODO
----
* Check how this works when we deal with virtual sites.
"""
atom_ids = []
for r in topology.residues():
if r.index in residxs:
atom_ids.extend([at.index for at in r.atoms()])
return atom_ids
@staticmethod
def _get_boresch_restraint(
universe: mda.Universe,
guest_rdmol: Chem.Mol,
guest_atom_ids: list[int],
host_atom_ids: list[int],
temperature: Quantity,
settings: BoreschRestraintSettings,
) -> tuple[BoreschRestraintGeometry, BoreschRestraint]:
"""
Get a Boresch-like restraint Geometry and OpenMM restraint force
supplier.
Parameters
----------
universe : mda.Universe
An MDAnalysis Universe defining the system to get the restraint for.
guest_rdmol : Chem.Mol
An RDKit Molecule defining the guest molecule in the system.
guest_atom_ids: list[int]
A list of atom indices defining the guest molecule in the universe.
host_atom_ids : list[int]
A list of atom indices defining the host molecules in the universe.
temperature : openff.units.Quantity
The temperature of the simulation where the restraint will be added.
settings : BoreschRestraintSettings
Settings on how the Boresch-like restraint should be defined.
Returns
-------
geom : BoreschRestraintGeometry
A class defining the Boresch-like restraint.
restraint : BoreschRestraint
A factory class for generating Boresch restraints in OpenMM.
"""
# Take the minimum of the two possible force constants to check against
frc_const = min(settings.K_thetaA, settings.K_thetaB)
geom = geometry.boresch.find_boresch_restraint(
universe=universe,
guest_rdmol=guest_rdmol,
guest_idxs=guest_atom_ids,
host_idxs=host_atom_ids,
host_selection=settings.host_selection,
anchor_finding_strategy=settings.anchor_finding_strategy,
dssp_filter=settings.dssp_filter,
rmsf_cutoff=settings.rmsf_cutoff,
host_min_distance=settings.host_min_distance,
host_max_distance=settings.host_max_distance,
angle_force_constant=frc_const,
temperature=temperature,
)
restraint = omm_restraints.BoreschRestraint(settings)
return geom, restraint
def _add_restraints(
self,
system: System,
topology: omm_topology,
positions: ommunit.Quantity,
alchem_comps: dict[str, list[Component]],
comp_resids: dict[Component, npt.NDArray],
settings: dict[str, SettingsBaseModel],
) -> tuple[
Quantity,
System,
geometry.HostGuestRestraintGeometry,
]:
"""
Find and add restraints to the OpenMM System.
Notes
-----
Currently, only Boresch-like restraints are supported.
Parameters
----------
system : openmm.System
The System to add the restraint to.
topology : openmm.app.Topology
An OpenMM Topology that defines the System.
positions: openmm.unit.Quantity
The System's current positions.
Used if a trajectory file isn't found.
alchem_comps: dict[str, list[Component]]
A dictionary with a list of alchemical components
in both state A and B.
comp_resids: dict[Component, npt.NDArray]
A dictionary keyed by each Component in the System
which contains arrays with the residue indices that is contained
by that Component.
settings : dict[str, SettingsBaseModel]
A dictionary of settings that defines how to find and set
the restraint.
Returns
-------
correction : openff.units.Quantity
The standard state correction for the restraint.
system : openmm.System
A copy of the System with the restraint added.
rest_geom : geometry.HostGuestRestraintGeometry
The restraint Geometry object.
"""
if self.verbose:
self.logger.info("Generating restraints")
# Get the guest rdmol
guest_rdmol = alchem_comps["stateA"][0].to_rdkit()
# sanitize the rdmol if possible - warn if you can't
err = Chem.SanitizeMol(guest_rdmol, catchErrors=True)
if err:
msg = "restraint generation: could not sanitize ligand rdmol"
logger.warning(msg)
# Get the guest idxs
# concatenate a list of residue indexes for all alchemical components
residxs = np.concatenate([comp_resids[key] for key in alchem_comps["stateA"]])
# get the alchemicical atom ids
guest_atom_ids = self._get_idxs_from_residxs(topology, residxs)
# Now get the host idxs
# We assume this is everything but the alchemical component
# and the solvent.
solv_comps = [c for c in comp_resids if isinstance(c, SolventComponent)]
exclude_comps = [alchem_comps["stateA"]] + solv_comps
residxs = np.concatenate([v for i, v in comp_resids.items() if i not in exclude_comps])
host_atom_ids = self._get_idxs_from_residxs(topology, residxs)
# Finally create an MDAnalysis Universe
# We try to pass the equilibration production file path through
# In some cases (debugging / dry runs) this won't be available
# so we'll default to using input positions.
univ = self._get_mda_universe(
topology,
positions,
self.shared_basepath / settings["equil_output_settings"].production_trajectory_filename,
)
if isinstance(settings["restraint_settings"], BoreschRestraintSettings):
rest_geom, restraint = self._get_boresch_restraint(
univ,
guest_rdmol,
guest_atom_ids,
host_atom_ids,
settings["thermo_settings"].temperature,
settings["restraint_settings"],
)
else:
# TODO turn this into a direction for different restraint types supported?
raise NotImplementedError("Other restraint types are not yet available")
if self.verbose:
self.logger.info(f"restraint geometry is: {rest_geom}")
# We need a temporary thermodynamic state to add the restraint
# & get the correction
thermodynamic_state = ThermodynamicState(
system,
temperature=to_openmm(settings["thermo_settings"].temperature),
pressure=to_openmm(settings["thermo_settings"].pressure),
)
# Add the force to the thermodynamic state
restraint.add_force(
thermodynamic_state,
rest_geom,
controlling_parameter_name="lambda_restraints",
)
# Get the standard state correction as a unit.Quantity
correction = restraint.get_standard_state_correction(
thermodynamic_state,
rest_geom,
)
return (
correction,
# Remove the thermostat, otherwise you'll get an
# Andersen thermostat by default!
thermodynamic_state.get_system(remove_thermostat=True),
rest_geom,
)
class ABFEComplexSimUnit(
ComplexComponentsMixin, ComplexSettingsMixin, BaseAbsoluteMultiStateSimulationUnit
):
"""
Multi-state simulation (e.g. multi replica methods like Hamiltonian
replica exchange) unit for the complex phase of absolute binding
free energy transformations.
"""
simtype = "complex"
class ABFEComplexAnalysisUnit(ComplexSettingsMixin, BaseAbsoluteMultiStateAnalysisUnit):
"""
Analysis unit for multi-state simulations with the complex phase
of absolute binding free energy transformations.
"""
simtype = "complex"
class SolventComponentsMixin:
def _get_components(self):
"""
Get the relevant components for a solvent transformation.
Returns
-------
alchem_comps : dict[str, Component]
A list of alchemical components
solv_comp : SolventComponent
The SolventComponent of the system
prot_comp : ProteinComponent | None
The protein component of the system, if it exists.
small_mols : dict[SmallMoleculeComponent: OFFMolecule]
SmallMoleculeComponents to add to the system.
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
solv_comp, prot_comp, small_mols = system_validation.get_components(stateA)
off_comps = {m: m.to_openff() for m in alchem_comps["stateA"]}
# We don't need to check that solv_comp is not None, otherwise
# an error will have been raised when calling `validate_solvent`
# in the Protocol's `_create`.
# Similarly we don't need to check prot_comp just return None
return alchem_comps, solv_comp, None, off_comps
class SolventSettingsMixin:
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a solvent transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : ABFEPreEquilOutputSettings
* simulation_settings : MultiStateSimulationSettings
* output_settings: MultiStateOutputSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore[attr-defined]
settings = {}
settings["forcefield_settings"] = prot_settings.forcefield_settings
settings["thermo_settings"] = prot_settings.thermo_settings
settings["charge_settings"] = prot_settings.partial_charge_settings
settings["solvation_settings"] = prot_settings.solvent_solvation_settings
settings["alchemical_settings"] = prot_settings.alchemical_settings
settings["lambda_settings"] = prot_settings.solvent_lambda_settings
settings["engine_settings"] = prot_settings.engine_settings
settings["integrator_settings"] = prot_settings.solvent_integrator_settings
settings["equil_simulation_settings"] = prot_settings.solvent_equil_simulation_settings
settings["equil_output_settings"] = prot_settings.solvent_equil_output_settings
settings["simulation_settings"] = prot_settings.solvent_simulation_settings
settings["output_settings"] = prot_settings.solvent_output_settings
return settings
class ABFESolventSetupUnit(SolventComponentsMixin, SolventSettingsMixin, BaseAbsoluteSetupUnit):
"""
Setup unit for the solvent phase of absolute binding free energy
transformations.
"""
simtype = "solvent"
class ABFESolventSimUnit(
SolventComponentsMixin, SolventSettingsMixin, BaseAbsoluteMultiStateSimulationUnit
):
"""
Multi-state simulation (e.g. multi replica methods like Hamiltonian
replica exchange) unit for the solvent phase of absolute binding
free energy transformations.
"""
simtype = "solvent"
class ABFESolventAnalysisUnit(SolventSettingsMixin, BaseAbsoluteMultiStateAnalysisUnit):
"""
Analysis unit for multi-state simulations with the solvent phase
of absolute binding free energy transformations.
"""
simtype = "solvent"
================================================
FILE: src/openfe/protocols/openmm_afe/afe_protocol_results.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Result classes for the Absolute Free Energy Protocols
=====================================================
This module implements :class:`gufe.ProtocolResult` classes for the absolute
free energy Protocols.
Specifically it implements:
* AbsoluteBindingProtocolResult
* AbsoluteSolvationProtocolResult
"""
import itertools
import logging
import pathlib
import warnings
from typing import Optional, Union
import gufe
import numpy as np
import numpy.typing as npt
from openff.units import Quantity
from openff.units import unit as offunit
from openmmtools import multistate
from openfe.protocols.restraint_utils.geometry.boresch import BoreschRestraintGeometry
logger = logging.getLogger(__name__)
class AbsoluteProtocolResultMixin:
bound_state = "solvent"
unbound_state = "vacuum"
def __init__(self, **data):
super().__init__(**data)
# TODO: Detect when we have extensions and stitch these together?
if any(
len(pur_list) > 2
for pur_list in itertools.chain(
self.data[self.bound_state].values(), self.data[self.unbound_state].values()
)
):
raise NotImplementedError("Can't stitch together results yet")
def get_forward_and_reverse_energy_analysis(
self,
) -> dict[str, list[Optional[dict[str, Union[npt.NDArray, Quantity]]]]]:
"""
Get the reverse and forward analysis of the free energies.
Returns
-------
forward_reverse : dict[str, list[Optional[dict[str, Union[npt.NDArray, openff.units.Quantity]]]]]
A dictionary, keyed for each leg of the thermodynamic cycle,
either ``solvent`` and ``vacuum` for a solvation free energy or
``solvent`` and ``complex`` for a binding free energy,
with each containing a list of dictionaries containing the forward
and reverse analysis of each repeat of that simulation type.
The forward and reverse analysis dictionaries contain:
- `fractions`: npt.NDArray
The fractions of data used for the estimates
- `forward_DGs`, `reverse_DGs`: openff.units.Quantity
The forward and reverse estimates for each fraction of data
- `forward_dDGs`, `reverse_dDGs`: openff.units.Quantity
The forward and reverse estimate uncertainty for each
fraction of data.
If one of the cycle leg list entries is ``None``, this indicates
that the analysis could not be carried out for that repeat. This
is most likely caused by MBAR convergence issues when attempting to
calculate free energies from too few samples.
Raises
------
UserWarning
* If any of the forward and reverse dictionaries are ``None`` in a
given thermodynamic cycle leg.
"""
forward_reverse: dict[str, list[Optional[dict[str, Union[npt.NDArray, Quantity]]]]] = {}
for key in [self.bound_state, self.unbound_state]:
forward_reverse[key] = [
pus[0].outputs["forward_and_reverse_energies"]
for pus in self.data[key].values() # type: ignore[attr-defined]
]
if None in forward_reverse[key]:
wmsg = (
"One or more ``None`` entries were found in the forward "
f"and reverse dictionaries of the repeats of the {key} "
"calculations. This is likely caused by an MBAR convergence "
"failure caused by too few independent samples when "
"calculating the free energies of the 10% timeseries slice."
)
warnings.warn(wmsg)
return forward_reverse
def get_overlap_matrices(self) -> dict[str, list[dict[str, npt.NDArray]]]:
"""
Get a the MBAR overlap estimates for all legs of the simulation.
Returns
-------
overlap_stats : dict[str, list[dict[str, npt.NDArray]]]
A dictionary keyed for each leg of the thermodynamic cycle, either
``solvent`` and ``vacuum` for a solvation free energy or
``solvent`` and ``complex`` for a binding free energy,
with each containing a list of dictionaries with the MBAR overlap
estimates of each repeat of that simulation type.
The underlying MBAR dictionaries contain the following keys:
* ``scalar``: One minus the largest nontrivial eigenvalue
* ``eigenvalues``: The sorted (descending) eigenvalues of the
overlap matrix
* ``matrix``: Estimated overlap matrix of observing a sample from
state i in state j
"""
# Loop through and get the repeats and get the matrices
overlap_stats: dict[str, list[dict[str, npt.NDArray]]] = {}
for key in [self.bound_state, self.unbound_state]:
overlap_stats[key] = [
pus[0].outputs["unit_mbar_overlap"]
for pus in self.data[key].values() # type: ignore[attr-defined]
]
return overlap_stats
def get_replica_transition_statistics(self) -> dict[str, list[dict[str, npt.NDArray]]]:
"""
Get the replica exchange transition statistics for all
legs of the simulation.
Note
----
This is currently only available in cases where a replica exchange
simulation was run.
Returns
-------
repex_stats : dict[str, list[dict[str, npt.NDArray]]]
A dictionary with keys for each leg of the thermodynamic cycle, either
``solvent`` and ``vacuum` for a solvation free energy or
``solvent`` and ``complex`` for a binding free energy,
with each containing a list of dictionaries containing the replica
transition statistics for each repeat of that simulation type.
The replica transition statistics dictionaries contain the following:
* ``eigenvalues``: The sorted (descending) eigenvalues of the
lambda state transition matrix
* ``matrix``: The transition matrix estimate of a replica switching
from state i to state j.
"""
repex_stats: dict[str, list[dict[str, npt.NDArray]]] = {}
try:
for key in [self.bound_state, self.unbound_state]:
repex_stats[key] = [
pus[0].outputs["replica_exchange_statistics"]
for pus in self.data[key].values() # type: ignore[attr-defined]
]
except KeyError:
errmsg = "Replica exchange statistics were not found, did you run a repex calculation?"
raise ValueError(errmsg)
return repex_stats
def get_replica_states(self) -> dict[str, list[npt.NDArray]]:
"""
Get the timeseries of replica states for all simulation legs.
Returns
-------
replica_states : dict[str, list[npt.NDArray]]
Dictionary keyed for each leg of the thermodynamic cycle, either
`solvent` and `vacuum` for solvation free energies,
or `complex` and `solvent` for binding free energies,
with lists of replica states timeseries for each repeat of that
simulation type.
"""
replica_states: dict[str, list[npt.NDArray]] = {
self.bound_state: [],
self.unbound_state: [],
}
def is_file(filename: str):
p = pathlib.Path(filename)
if not p.exists():
errmsg = f"File could not be found {p}"
raise ValueError(errmsg)
return p
def get_replica_state(nc, chk):
nc = is_file(nc)
dir_path = nc.parents[0]
chk = is_file(dir_path / chk).name
reporter = multistate.MultiStateReporter(
storage=nc, checkpoint_storage=chk, open_mode="r"
)
retval = np.asarray(reporter.read_replica_thermodynamic_states())
reporter.close()
return retval
for key in [self.bound_state, self.unbound_state]:
for pus in self.data[key].values(): # type: ignore[attr-defined]
states = get_replica_state(
pus[0].outputs["trajectory"],
pus[0].outputs["checkpoint"],
)
replica_states[key].append(states)
return replica_states
def equilibration_iterations(self) -> dict[str, list[float]]:
"""
Get the number of equilibration iterations for each simulation.
Returns
-------
equilibration_lengths : dict[str, list[float]]
Dictionary keyed for each leg of the thermodynamic cycle, either
`solvent` and `vacuum` for solvation free energies,
or `complex` and `solvent` for binding free energies,
with lists containing the number of equilibration iterations for
each repeat of that simulation type.
"""
equilibration_lengths: dict[str, list[float]] = {}
for key in [self.bound_state, self.unbound_state]:
equilibration_lengths[key] = [
pus[0].outputs["equilibration_iterations"]
for pus in self.data[key].values() # type: ignore[attr-defined]
]
return equilibration_lengths
def production_iterations(self) -> dict[str, list[float]]:
"""
Get the number of production iterations for each simulation.
Returns the number of uncorrelated production samples for each
repeat of the calculation.
Returns
-------
production_lengths : dict[str, list[float]]
Dictionary keyed for each leg of the thermodynamic cycle, either
`solvent` and `vacuum` for solvation free energies,
or `complex` and `solvent` for binding free energies,
with lists containing the number of equilibration iterations for
each repeat of that simulation type.
"""
production_lengths: dict[str, list[float]] = {}
for key in [self.bound_state, self.unbound_state]:
production_lengths[key] = [
pus[0].outputs["production_iterations"]
for pus in self.data[key].values() # type: ignore[attr-defined]
]
return production_lengths
def selection_indices(self) -> dict[str, list[Optional[npt.NDArray]]]:
"""
Get the system selection indices used to write PDB and
trajectory files.
Returns
-------
indices : dict[str, list[npt.NDArray]]
A dictionary keyed for each state, either
`solvent` and `vacuum` for solvation free energies,
or `complex` and `solvent` for binding free energies,
each containing a list of NDArrays containing the corresponding
full system atom indices for each atom written in the production
trajectory files for each replica.
"""
indices: dict[str, list[Optional[npt.NDArray]]] = {}
for key in [self.bound_state, self.unbound_state]:
indices[key] = []
for pus in self.data[key].values(): # type: ignore[attr-defined]
indices[key].append(pus[0].outputs["selection_indices"])
return indices
class AbsoluteSolvationProtocolResult(gufe.ProtocolResult, AbsoluteProtocolResultMixin):
"""
Protocol results with the output of a AbsoluteSolvationProtocol
"""
bound_state = "solvent"
unbound_state = "vacuum"
def get_individual_estimates(self) -> dict[str, list[tuple[Quantity, Quantity]]]:
"""
Get the individual estimate of the free energies.
Returns
-------
dGs : dict[str, list[tuple[openff.units.Quantity, openff.units.Quantity]]]
A dictionary, keyed `solvent` and `vacuum` for each leg
of the thermodynamic cycle, with lists of tuples containing
the individual free energy estimates and associated MBAR
uncertainties for each repeat of that simulation type.
"""
dGs = {}
for state in [self.bound_state, self.unbound_state]:
state_dGs = [
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
for pus in self.data[state].values()
]
dGs[state] = state_dGs
return dGs
def get_estimate(self):
"""Get the solvation free energy estimate for this calculation.
Returns
-------
dG : openff.units.Quantity
The solvation free energy. This is a Quantity defined with units.
"""
def _get_average(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
dGs = [i[0].to(u).m for i in estimates]
return np.average(dGs) * u
individual_estimates = self.get_individual_estimates()
vac_dG = _get_average(individual_estimates["vacuum"])
solv_dG = _get_average(individual_estimates["solvent"])
return vac_dG - solv_dG
def get_uncertainty(self):
"""Get the solvation free energy error for this calculation.
Returns
-------
err : openff.units.Quantity
The standard deviation between estimates of the solvation free
energy. This is a Quantity defined with units.
"""
def _get_stdev(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
dGs = [i[0].to(u).m for i in estimates]
return np.std(dGs) * u
individual_estimates = self.get_individual_estimates()
vac_err = _get_stdev(individual_estimates["vacuum"])
solv_err = _get_stdev(individual_estimates["solvent"])
# return the combined error
return np.sqrt(vac_err**2 + solv_err**2)
class AbsoluteBindingProtocolResult(gufe.ProtocolResult, AbsoluteProtocolResultMixin):
"""
Protocol results with the output of a AbsoluteBindingProtocol.
"""
bound_state = "complex"
unbound_state = "solvent"
def get_individual_estimates(
self,
) -> dict[str, list[tuple[Quantity, Quantity]]]:
"""
Get the individual estimate of the free energies.
Returns
-------
dGs : dict[str, list[tuple[openff.units.Quantity, openff.units.Quantity]]]
A dictionary, keyed `solvent`, `complex`, and 'standard_state'
representing each portion of the thermodynamic cycle,
with lists of tuples containing the individual free energy
estimates and, for 'solvent' and 'complex', the associated MBAR
uncertainties for each repeat of that simulation type.
Notes
-----
* Standard state correction has no error and so will return a value
of 0.
"""
complex_dGs = []
correction_dGs = []
solv_dGs = []
for pus in self.data["complex"].values():
complex_dGs.append(
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
)
correction_dGs.append(
(
pus[0].outputs["standard_state_correction"],
0 * offunit.kilocalorie_per_mole, # correction has no error
)
)
for pus in self.data["solvent"].values():
solv_dGs.append(
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
)
return {
"solvent": solv_dGs,
"complex": complex_dGs,
"standard_state_correction": correction_dGs,
}
@staticmethod
def _add_complex_standard_state_corr(
complex_dG: list[tuple[Quantity, Quantity]],
standard_state_dG: list[tuple[Quantity, Quantity]],
) -> list[tuple[Quantity, Quantity]]:
"""
Helper method to combine the
complex & standard state corrections legs.
Parameters
----------
complex_dG : list[tuple[openff.units.Quantity, openff.units.Quantity]]
The individual estimates of the complex leg,
where the first entry of each tuple is the dG estimate
and the second entry is the MBAR error.
standard_state_dG : list[tuple[Quantity, Quantity]]
The individual standard state corrections for each corresponding
complex leg. The first entry is the correction, the second
is an empty error value of 0.
Returns
-------
combined_dG : list[tuple[openff.units.Quantity,openff.units. Quantity]]
A list of dG estimates & MBAR errors for the combined
complex & standard state correction of each repeat.
Notes
-----
We assume that both list of items are in the right order.
"""
combined_dG: list[tuple[Quantity, Quantity]] = []
for comp, corr in zip(complex_dG, standard_state_dG):
# No need to convert unit types, since pint takes care of that
# except that mypy hates it because pint isn't typed properly...
# No need to add errors since there's just the one
combined_dG.append((comp[0] + corr[0], comp[1])) # type: ignore[operator]
return combined_dG
def get_estimate(self) -> Quantity:
"""Get the binding free energy estimate for this calculation.
Returns
-------
dG : openff.units.Quantity
The binding free energy. This is a Quantity defined with units.
"""
def _get_average(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
dGs = [i[0].to(u).m for i in estimates]
return np.average(dGs) * u
individual_estimates = self.get_individual_estimates()
complex_dG = _get_average(
self._add_complex_standard_state_corr(
individual_estimates["complex"],
individual_estimates["standard_state_correction"],
)
)
solv_dG = _get_average(individual_estimates["solvent"])
return -complex_dG + solv_dG
def get_uncertainty(self) -> Quantity:
"""Get the binding free energy error for this calculation.
Returns
-------
err : openff.units.Quantity
The standard deviation between estimates of the binding free
energy. This is a Quantity defined with units.
"""
def _get_stdev(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
dGs = [i[0].to(u).m for i in estimates]
return np.std(dGs) * u
individual_estimates = self.get_individual_estimates()
complex_err = _get_stdev(
self._add_complex_standard_state_corr(
individual_estimates["complex"],
individual_estimates["standard_state_correction"],
)
)
solv_err = _get_stdev(individual_estimates["solvent"])
# return the combined error
return np.sqrt(complex_err**2 + solv_err**2)
def restraint_geometries(self) -> list[BoreschRestraintGeometry]:
"""
Get a list of the restraint geometries for the
complex simulations. These define the atoms that have
been restrained in the system.
Returns
-------
geometries : list[dict[str, Any]]
A list of dictionaries containing the details of the atoms
in the system that are involved in the restraint.
"""
geometries = [
BoreschRestraintGeometry.model_validate(pus[0].outputs["restraint_geometry"])
for pus in self.data["complex"].values()
]
return geometries
================================================
FILE: src/openfe/protocols/openmm_afe/ahfe_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
AHFE Protocol Units --- :mod:`openfe.protocols.openmm_afe.ahfe_units`
=====================================================================
This module defines the ProtocolUnits for the
:class:`AbsoluteSolvationProtocol`.
"""
import logging
from openfe.protocols.openmm_afe.equil_afe_settings import (
SettingsBaseModel,
)
from ..openmm_utils import system_validation
from .base_afe_units import (
BaseAbsoluteMultiStateAnalysisUnit,
BaseAbsoluteMultiStateSimulationUnit,
BaseAbsoluteSetupUnit,
)
logger = logging.getLogger(__name__)
class VacuumComponentsMixin:
def _get_components(self):
"""
Get the relevant components for a vacuum transformation.
Returns
-------
alchem_comps : dict[str, list[Component]]
A list of alchemical components
solv_comp : None
For the gas phase transformation, None will always be returned
for the solvent component of the chemical system.
prot_comp : Optional[ProteinComponent]
The protein component of the system, if it exists.
small_mols : dict[Component, OpenFF Molecule]
The openff Molecules to add to the system. This
is equivalent to the alchemical components in stateA (since
we only allow for disappearing ligands).
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
off_comps = {m: m.to_openff() for m in alchem_comps["stateA"]}
_, prot_comp, _ = system_validation.get_components(stateA)
# Notes:
# 1. Our input state will contain a solvent, we ``None`` that out
# since this is the gas phase unit.
# 2. Our small molecules will always just be the alchemical components
# (of stateA since we enforce only one disappearing ligand)
return alchem_comps, None, prot_comp, off_comps
class VacuumSettingsMixin:
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a vacuum transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : MDOutputSettings
* simulation_settings : SimulationSettings
* output_settings: MultiStateOutputSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore[attr-defined]
settings = {}
settings["forcefield_settings"] = prot_settings.vacuum_forcefield_settings
settings["thermo_settings"] = prot_settings.thermo_settings
settings["charge_settings"] = prot_settings.partial_charge_settings
settings["solvation_settings"] = prot_settings.solvation_settings
settings["alchemical_settings"] = prot_settings.alchemical_settings
settings["lambda_settings"] = prot_settings.lambda_settings
settings["engine_settings"] = prot_settings.vacuum_engine_settings
settings["integrator_settings"] = prot_settings.integrator_settings
settings["equil_simulation_settings"] = prot_settings.vacuum_equil_simulation_settings
settings["equil_output_settings"] = prot_settings.vacuum_equil_output_settings
settings["simulation_settings"] = prot_settings.vacuum_simulation_settings
settings["output_settings"] = prot_settings.vacuum_output_settings
return settings
class AHFEVacuumSetupUnit(VacuumComponentsMixin, VacuumSettingsMixin, BaseAbsoluteSetupUnit):
"""
Setup unit for the vacuum phase of absolute hydration free energy
transformations.
"""
simtype = "vacuum"
class AHFEVacuumSimUnit(
VacuumComponentsMixin, VacuumSettingsMixin, BaseAbsoluteMultiStateSimulationUnit
):
"""
Multi-state simulation (e.g. multi replica methods like Hamiltonian
replica exchange) unit for the vacuum phase of absolute hydration
free energy transformations.
"""
simtype = "vacuum"
class AHFEVacuumAnalysisUnit(VacuumSettingsMixin, BaseAbsoluteMultiStateAnalysisUnit):
"""
Analysis unit for multi-state simulations with the vacuum phase
of absolute hydration free energy transformations.
"""
simtype = "vacuum"
class SolventComponentsMixin:
def _get_components(self):
"""
Get the relevant components for a solvent transformation.
Returns
-------
alchem_comps : dict[str, Component]
A list of alchemical components
solv_comp : SolventComponent
The SolventComponent of the system
prot_comp : Optional[ProteinComponent]
The protein component of the system, if it exists.
small_mols : dict[SmallMoleculeComponent: OFFMolecule]
SmallMoleculeComponents to add to the system.
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
solv_comp, prot_comp, small_mols = system_validation.get_components(stateA)
off_comps = {m: m.to_openff() for m in small_mols}
# We don't need to check that solv_comp is not None, otherwise
# an error will have been raised when calling `validate_solvent`
# in the Protocol's `_create`.
# Similarly we don't need to check prot_comp since that's also
# disallowed on create
return alchem_comps, solv_comp, prot_comp, off_comps
class SolventSettingsMixin:
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a solvent transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : MDOutputSettings
* simulation_settings : MultiStateSimulationSettings
* output_settings: MultiStateOutputSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore[attr-defined]
settings = {}
settings["forcefield_settings"] = prot_settings.solvent_forcefield_settings
settings["thermo_settings"] = prot_settings.thermo_settings
settings["charge_settings"] = prot_settings.partial_charge_settings
settings["solvation_settings"] = prot_settings.solvation_settings
settings["alchemical_settings"] = prot_settings.alchemical_settings
settings["lambda_settings"] = prot_settings.lambda_settings
settings["engine_settings"] = prot_settings.solvent_engine_settings
settings["integrator_settings"] = prot_settings.integrator_settings
settings["equil_simulation_settings"] = prot_settings.solvent_equil_simulation_settings
settings["equil_output_settings"] = prot_settings.solvent_equil_output_settings
settings["simulation_settings"] = prot_settings.solvent_simulation_settings
settings["output_settings"] = prot_settings.solvent_output_settings
return settings
class AHFESolventSetupUnit(SolventComponentsMixin, SolventSettingsMixin, BaseAbsoluteSetupUnit):
"""
Setup unit for the solvent phase of absolute hydration free energy
transformations.
"""
simtype = "solvent"
class AHFESolventSimUnit(
SolventComponentsMixin, SolventSettingsMixin, BaseAbsoluteMultiStateSimulationUnit
):
"""
Multi-state simulation (e.g. multi replica methods like Hamiltonian
replica exchange) unit for the solvent phase of absolute hydration
free energy transformations.
"""
simtype = "solvent"
class AHFESolventAnalysisUnit(SolventSettingsMixin, BaseAbsoluteMultiStateAnalysisUnit):
"""
Analysis unit for multi-state simulations with the solvent phase
of absolute hydration free energy transformations.
"""
simtype = "solvent"
================================================
FILE: src/openfe/protocols/openmm_afe/base_afe_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM AFE Protocol base classes
===================================
Base classes for the OpenMM absolute free energy ProtocolUnits.
This mostly implements BaseAbsoluteUnit whose methods can be
overridden to define different types of alchemical transformations.
TODO
----
* Add in all the AlchemicalFactory and AlchemicalRegion kwargs
as settings.
* Allow for a more flexible setting of Lambda regions.
"""
import abc
import copy
import logging
import os
import pathlib
from typing import Any
import gufe
import numpy as np
import numpy.typing as npt
import openmm
import openmmtools
from gufe import (
BaseSolventComponent,
ProteinComponent,
SmallMoleculeComponent,
SolventComponent,
)
from gufe.components import Component
from gufe.protocols.errors import ProtocolUnitExecutionError
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.units import Quantity
from openff.units import unit as offunit
from openff.units.openmm import ensure_quantity, from_openmm, to_openmm
from openmm import app
from openmm import unit as ommunit
from openmmforcefields.generators import SystemGenerator
from openmmtools import multistate
from openmmtools.alchemy import (
AbsoluteAlchemicalFactory,
AlchemicalRegion,
AlchemicalState,
)
from openmmtools.states import (
GlobalParameterState,
SamplerState,
ThermodynamicState,
create_thermodynamic_state_protocol,
)
import openfe
from openfe.protocols.openmm_afe.equil_afe_settings import (
AlchemicalSettings,
BaseSolvationSettings,
IntegratorSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
ThermoSettings,
)
from openfe.protocols.openmm_md.plain_md_methods import PlainMDSimulationUnit
from openfe.protocols.openmm_utils import (
charge_generation,
multistate_analysis,
omm_compute,
settings_validation,
system_creation,
system_validation,
)
from openfe.protocols.openmm_utils.mdtraj_utils import (
mdtraj_from_openmm,
)
from openfe.protocols.openmm_utils.omm_settings import (
SettingsBaseModel,
)
from openfe.protocols.openmm_utils.serialization import (
deserialize,
make_vec3_box,
serialize,
)
from openfe.protocols.restraint_utils import geometry
from openfe.protocols.restraint_utils.openmm import omm_restraints
from openfe.utils import log_system_probe, without_oechem_backend
logger = logging.getLogger(__name__)
class AbsoluteUnitMixin:
def _prepare(
self,
verbose: bool,
scratch_basepath: pathlib.Path | None,
shared_basepath: pathlib.Path | None,
):
"""
Set basepaths and do some initial logging.
Parameters
----------
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath : pathlib.Path | None
Optional base path to write scratch files to.
shared_basepath : pathlib.Path | None
Optional base path to write shared files to.
"""
self.verbose = verbose
# set basepaths
def _set_optional_path(basepath):
if basepath is None:
return pathlib.Path(".")
return basepath
self.scratch_basepath = _set_optional_path(scratch_basepath)
self.shared_basepath = _set_optional_path(shared_basepath)
@abc.abstractmethod
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Get a dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* solvation_settings : BaseSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : MDOutputSettings
* simulation_settings : MultiStateSimulationSettings
* output_settings : MultiStateOutputSettings
Settings may change depending on what type of simulation you are
running. Cherry pick them and return them to be available later on.
This method should also add various validation checks as necessary.
Note
----
Must be implemented in the child class.
"""
...
@staticmethod
def _verify_execution_environment(
setup_outputs: dict[str, Any],
) -> None:
"""
Check that the Python environment hasn't changed based on the
relevant Python library versions stored in the setup outputs.
"""
try:
if (
(gufe.__version__ != setup_outputs["gufe_version"])
or (openfe.__version__ != setup_outputs["openfe_version"])
or (openmm.__version__ != setup_outputs["openmm_version"])
):
errmsg = "Python environment has changed, cannot continue Protocol execution."
raise ProtocolUnitExecutionError(errmsg)
except KeyError:
errmsg = "Missing environment information from setup outputs."
raise ProtocolUnitExecutionError(errmsg)
class BaseAbsoluteSetupUnit(gufe.ProtocolUnit, AbsoluteUnitMixin):
"""
Base class for setting up an absolute free energy transformations.
"""
@abc.abstractmethod
def _get_components(
self,
) -> tuple[
dict[str, list[Component]],
gufe.SolventComponent | None,
gufe.ProteinComponent | None,
dict[SmallMoleculeComponent, OFFMolecule],
]:
"""
Get the relevant components to create the alchemical system with.
Note
----
Must be implemented in the child class.
"""
...
@staticmethod
def _get_alchemical_indices(
omm_top: openmm.app.Topology,
comp_resids: dict[Component, npt.NDArray],
alchem_comps: dict[str, list[Component]],
) -> list[int]:
"""
Get a list of atom indices for all the alchemical species
Parameters
----------
omm_top : openmm.Topology
Topology of OpenMM System.
comp_resids : dict[Component, npt.NDArray]
A dictionary of residues for each component in the System.
alchem_comps : dict[str, list[Component]]
A dictionary of alchemical components for each end state.
Return
------
atom_ids : list[int]
A list of atom indices for the alchemical species
"""
# concatenate a list of residue indexes for all alchemical components
residxs = np.concatenate([comp_resids[key] for key in alchem_comps["stateA"]])
# get the alchemicical atom ids
atom_ids = []
for r in omm_top.residues():
if r.index in residxs:
atom_ids.extend([at.index for at in r.atoms()])
return atom_ids
def _pre_equilibrate(
self,
system: openmm.System,
topology: openmm.app.Topology,
positions: ommunit.Quantity,
settings: dict[str, SettingsBaseModel],
dry: bool,
) -> tuple[ommunit.Quantity, ommunit.Quantity]:
"""
Run a non-alchemical equilibration to get a stable system.
Parameters
----------
system : openmm.System
The OpenMM System to equilibrate.
topology : openmm.app.Topology
OpenMM Topology of the System.
positions : openmm.unit.Quantity
Initial positions for the system.
settings : dict[str, SettingsBaseModel]
A dictionary of settings objects. Expects the
following entries:
* `forcefield_settings`
* `engine_settings`
* `thermo_settings`
* `integrator_settings`
* `equil_simulation_settings`
* `equil_output_settings`
dry: bool
Whether or not this is a dry run.
Returns
-------
equilibrated_positions : npt.NDArray
Equilibrated system positions
box : openmm.unit.Quantity
Box vectors of the equilibrated system.
"""
# Prep the simulation object
# Restrict CPU count if running vacuum simulation
restrict_cpu = settings["forcefield_settings"].nonbonded_method.lower() == "nocutoff"
platform = omm_compute.get_openmm_platform(
platform_name=settings["engine_settings"].compute_platform,
gpu_device_index=settings["engine_settings"].gpu_device_index,
restrict_cpu_count=restrict_cpu,
)
integrator = openmm.LangevinMiddleIntegrator(
to_openmm(settings["thermo_settings"].temperature),
to_openmm(settings["integrator_settings"].langevin_collision_rate),
to_openmm(settings["integrator_settings"].timestep),
)
simulation = openmm.app.Simulation(
topology=topology,
system=system,
integrator=integrator,
platform=platform,
)
# Get the necessary number of steps
if settings["equil_simulation_settings"].equilibration_length_nvt is not None:
equil_steps_nvt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].equilibration_length_nvt,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
else:
equil_steps_nvt = None
equil_steps_npt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].equilibration_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
prod_steps_npt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].production_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
if self.verbose:
self.logger.info("running non-alchemical equilibration MD")
# Don't do anything if we're doing a dry run
if dry:
box = system.getDefaultPeriodicBoxVectors()
return positions, to_openmm(from_openmm(box))
# Use the _run_MD method from the PlainMDSimulationUnit
# Should in-place modify the simulation
PlainMDSimulationUnit._run_MD(
simulation=simulation,
positions=positions,
simulation_settings=settings["equil_simulation_settings"],
output_settings=settings["equil_output_settings"],
temperature=settings["thermo_settings"].temperature,
barostat_frequency=settings["integrator_settings"].barostat_frequency,
timestep=settings["integrator_settings"].timestep,
equil_steps_nvt=equil_steps_nvt,
equil_steps_npt=equil_steps_npt,
prod_steps=prod_steps_npt,
verbose=self.verbose,
shared_basepath=self.shared_basepath,
)
# TODO: if we still see crashes, see if using enforcePeriodicBox is necessary
# on newer tests, these were not necessary.
state = simulation.context.getState(getPositions=True)
equilibrated_positions = state.getPositions(asNumpy=True)
box = state.getPeriodicBoxVectors()
# cautiously delete out contexts & integrator
del simulation.context, integrator
return equilibrated_positions, to_openmm(from_openmm(box))
@staticmethod
def _assign_partial_charges(
partial_charge_settings: OpenFFPartialChargeSettings,
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
) -> None:
"""
Assign partial charges to the OpenFF Molecules associated with
all the SmallMoleculeComponents in the transformation.
Parameters
----------
charge_settings : OpenFFPartialChargeSettings
Settings for controlling how the partial charges are assigned.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by their
associated SmallMoleculeComponent.
"""
for mol in small_mols.values():
charge_generation.assign_offmol_partial_charges(
offmol=mol,
overwrite=False,
method=partial_charge_settings.partial_charge_method,
toolkit_backend=partial_charge_settings.off_toolkit_backend,
generate_n_conformers=partial_charge_settings.number_of_conformers,
nagl_model=partial_charge_settings.nagl_model,
)
@staticmethod
def _get_system_generator(
settings: dict[str, SettingsBaseModel],
solvent_component: BaseSolventComponent | None,
openff_molecules: list[OFFMolecule],
ffcache: pathlib.Path | None,
) -> SystemGenerator:
"""
Get a system generator through the system creation
utilities
Parameters
----------
settings : dict[str, SettingsBaseModel]
A dictionary of settings object for the unit.
solvent_comp : BaseSolventComponent | None
The solvent component of this system, if there is one.
openff_molecules : list[openff.toolkit.Molecule] | None
A list of OpenFF Molecules to generate templates for, if any.
ffcache : pathlib.Path | None
Path to the force field parameter cache.
Returns
-------
system_generator : openmmforcefields.generator.SystemGenerator
System Generator to parameterise this unit.
"""
system_generator = system_creation.get_system_generator(
forcefield_settings=settings["forcefield_settings"],
integrator_settings=settings["integrator_settings"],
thermo_settings=settings["thermo_settings"],
cache=ffcache,
has_solvent=solvent_component is not None,
)
# Handle openff Molecule templates
# TODO: revisit this once the SystemGenerator update happens
if openff_molecules is None:
return system_generator
# Register all the templates, pass unique molecules to avoid clashes
system_generator.add_molecules(list(set(openff_molecules)))
return system_generator
@staticmethod
def _get_modeller(
protein_component: ProteinComponent | None,
solvent_component: BaseSolventComponent | None,
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
system_generator: SystemGenerator,
solvation_settings: BaseSolvationSettings,
) -> tuple[app.Modeller, dict[Component, npt.NDArray]]:
"""
Get an OpenMM Modeller object and a list of residue indices
for each component in the system.
Parameters
----------
protein_component : ProteinComponent | None
Protein Component, if it exists.
solvent_component : BaseSolventComponent | None
The solvent component, if it exists.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
SmallMoleculeComponent.
system_generator : openmmforcefields.generator.SystemGenerator
System Generator to parameterise this unit.
solvation_settings : BaseSolvationSettings
Settings detailing how to solvate the system.
Returns
-------
system_modeller : app.Modeller
OpenMM Modeller object generated from ProteinComponent and
OpenFF Molecules.
comp_resids : dict[Component, npt.NDArray]
Dictionary of residue indices for each component in system.
"""
# get OpenMM modeller + dictionary of resids for each component
system_modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=protein_component,
solvent_comp=solvent_component,
small_mols=small_mols,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
return system_modeller, comp_resids
def _get_omm_objects(
self,
settings: dict[str, SettingsBaseModel],
protein_component: ProteinComponent | None,
solvent_component: BaseSolventComponent | None,
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
) -> tuple[
app.Topology,
openmm.System,
openmm.unit.Quantity,
dict[Component, npt.NDArray],
]:
"""
Get the OpenMM Topology, Positions and System of the
parameterised system.
Parameters
----------
settings : dict[str, SettingsBaseModel]
Protocol settings
protein_component : ProteinComponent | None
Protein component for the system.
solvent_component : BaseSolventComponent | None
Solvent component for the system, if it exists.
small_mols : dict[str, openff.toolkit.Molecule]
Dictionary of SmallMoleculeComponents and OpenFF Molecules
defining the ligands to be added to the system
Returns
-------
topology : app.Topology
OpenMM Topology object describing the parameterized system.
system : openmm.System
A non-alchemical OpenMM System of the simulated system.
positions : openmm.unit.Quantity
Positions of the system.
comp_resids : dict[Component, npt.NDArray]
A dictionary of the residues for each component in the System.
"""
if self.verbose:
self.logger.info("Parameterizing system")
with without_oechem_backend():
system_generator = self._get_system_generator(
settings=settings,
solvent_component=solvent_component,
openff_molecules=list(small_mols.values()),
ffcache=self.shared_basepath / settings["output_settings"].forcefield_cache,
)
modeller, comp_resids = self._get_modeller(
protein_component=protein_component,
solvent_component=solvent_component,
small_mols=small_mols,
system_generator=system_generator,
solvation_settings=settings["solvation_settings"],
)
system = system_generator.create_system(
topology=modeller.topology,
molecules=list(small_mols.values()),
)
topology = modeller.getTopology()
# roundtrip positions to remove vec3 issues
positions = to_openmm(from_openmm(modeller.getPositions()))
return topology, system, positions, comp_resids
def _add_restraints(
self,
system: openmm.System,
topology: GlobalParameterState,
positions: openmm.unit.Quantity,
alchem_comps: dict[str, list[Component]],
comp_resids: dict[Component, npt.NDArray],
settings: dict[str, SettingsBaseModel],
) -> tuple[
Quantity | None,
openmm.System | None,
geometry.BaseRestraintGeometry | None,
]:
"""
Placeholder method to add restraints if necessary
"""
return None, system, None
def _get_alchemical_system(
self,
topology: app.Topology,
system: openmm.System,
comp_resids: dict[Component, npt.NDArray],
alchem_comps: dict[str, list[Component]],
alchemical_settings: AlchemicalSettings,
) -> tuple[AbsoluteAlchemicalFactory, openmm.System, list[int]]:
"""
Get an alchemically modified system and its associated factory
Parameters
----------
topology : openmm.Topology
Topology of OpenMM System.
system : openmm.System
System to alchemically modify.
comp_resids : dict[str, npt.NDArray]
A dictionary of residues for each component in the System.
alchem_comps : dict[str, list[Component]]
A dictionary of alchemical components for each end state.
alchemical_settings : AlchemicalSettings
Settings controlling how the alchemical system is built.
Returns
-------
alchemical_factory : AbsoluteAlchemicalFactory
Factory for creating an alchemically modified system.
alchemical_system : openmm.System
Alchemically modified system
alchemical_indices : list[int]
A list of atom indices for the alchemically modified
species in the system.
TODO
----
* Add support for all alchemical factory options
"""
alchemical_indices = self._get_alchemical_indices(topology, comp_resids, alchem_comps)
alchemical_region = AlchemicalRegion(
alchemical_atoms=alchemical_indices,
softcore_alpha=alchemical_settings.softcore_alpha,
annihilate_electrostatics=True,
annihilate_sterics=alchemical_settings.annihilate_sterics,
softcore_a=alchemical_settings.softcore_a,
softcore_b=alchemical_settings.softcore_b,
softcore_c=alchemical_settings.softcore_c,
softcore_beta=0.0,
softcore_d=1.0,
softcore_e=1.0,
softcore_f=2.0,
)
alchemical_factory = AbsoluteAlchemicalFactory(
consistent_exceptions=False,
switch_width=1.0 * ommunit.angstroms,
alchemical_pme_treatment="exact",
alchemical_rf_treatment="switched",
disable_alchemical_dispersion_correction=alchemical_settings.disable_alchemical_dispersion_correction,
split_alchemical_forces=True,
)
alchemical_system = alchemical_factory.create_alchemical_system(system, alchemical_region)
return alchemical_factory, alchemical_system, alchemical_indices
@staticmethod
def _subsample_topology(
topology: openmm.app.Topology,
positions: openmm.unit.Quantity,
output_selection: str,
output_file: pathlib.Path,
) -> npt.NDArray:
"""
Subsample the system based on user-selected output selection
and write the subsampled topology to a PDB file.
Parameters
----------
topology : openmm.app.Topology
The system topology to subsample.
positions : openmm.unit.Quantity
The system positions.
output_selection : str
An MDTraj selection string to subsample the topology with.
output_file : pathlib.Path
Path to the file to write the PDB to.
Returns
-------
selection_indices : npt.NDArray
The indices of the subselected system.
"""
traj = mdtraj_from_openmm(topology, positions)
selection_indices = traj.topology.select(output_selection)
# Write out the subselected structure to PDB if not empty
if len(selection_indices) > 0:
sub_traj = traj.atom_slice(selection_indices)
sub_traj.save_pdb(output_file)
return selection_indices
def run(
self,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Run the setup phase of an absolute free energy calculation.
Parameters
----------
dry : bool
Do a dry run of the calculation, creating all necessary alchemical
system components (topology, system, etc...) but without
running the simulation, default False
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging, default True
scratch_basepath : pathlib.Path | None
Path to the scratch (temporary) directory space. Defaults to the
current working directory if ``None``.
shared_basepath : pathlib.Path | None
Path to the shared (persistent) directory space. Defaults to the
current working directory if ``None``.
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
"""
# General preparation tasks
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting system setup unit")
# Get components
alchem_comps, solv_comp, prot_comp, small_mols = self._get_components()
# Get settings
settings = self._get_settings()
# Assign partial charges now to avoid any discrepancies later
self._assign_partial_charges(settings["charge_settings"], small_mols)
# Get OpenMM topology, positions, system, and comp_resids
omm_topology, omm_system, positions, comp_resids = self._get_omm_objects(
settings=settings,
protein_component=prot_comp,
solvent_component=solv_comp,
small_mols=small_mols,
)
# Pre-equilbrate System (Test + Avoid NaNs + get stable system)
positions, box_vectors = self._pre_equilibrate(
omm_system, omm_topology, positions, settings, dry
)
# Add restraints
# Note: when no restraint is applied, restrained_omm_system == omm_system
(
standard_state_corr,
restrained_omm_system,
restraint_geometry,
) = self._add_restraints(
omm_system,
omm_topology,
positions,
alchem_comps,
comp_resids,
settings,
)
# Get alchemical system
alchem_factory, alchem_system, alchem_indices = self._get_alchemical_system(
topology=omm_topology,
system=restrained_omm_system,
comp_resids=comp_resids,
alchem_comps=alchem_comps,
alchemical_settings=settings["alchemical_settings"],
)
# Subselect system based on user inputs & write initial PDB
selection_indices = self._subsample_topology(
topology=omm_topology,
positions=positions,
output_selection=settings["output_settings"].output_indices,
output_file=self.shared_basepath / settings["output_settings"].output_structure,
)
# Serialize relevant outputs
system_outfile = self.shared_basepath / "alchemical_system.xml.bz2"
serialize(alchem_system, system_outfile)
positions_outfile = self.shared_basepath / "system_positions.npy"
npy_positions = from_openmm(positions).to("nanometer").m
np.save(positions_outfile, npy_positions)
# Set the PDB file name
if len(selection_indices) > 0:
pdb_structure = self.shared_basepath / settings["output_settings"].output_structure
else:
pdb_structure = None
unit_results_dict = {
"system": system_outfile,
"positions": positions_outfile,
"pdb_structure": pdb_structure,
"selection_indices": selection_indices,
"box_vectors": from_openmm(box_vectors),
}
if standard_state_corr is not None:
unit_results_dict["standard_state_correction"] = standard_state_corr.to(
"kilocalorie_per_mole"
)
else:
unit_results_dict["standard_state_correction"] = 0 * offunit.kilocalorie_per_mole
if restraint_geometry is not None:
unit_results_dict["restraint_geometry"] = restraint_geometry.model_dump()
else:
unit_results_dict["restraint_geometry"] = None
if dry:
unit_results_dict |= {
"standard_system": omm_system,
"restrained_system": restrained_omm_system,
"alchem_system": alchem_system,
"alchem_indices": alchem_indices,
"alchem_factory": alchem_factory,
"debug_positions": positions,
}
return unit_results_dict
def _execute(
self,
ctx: gufe.Context,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
outputs = self.run(scratch_basepath=ctx.scratch, shared_basepath=ctx.shared)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
"openmm_version": openmm.__version__,
"openfe_version": openfe.__version__,
"gufe_version": gufe.__version__,
**outputs,
}
class BaseAbsoluteMultiStateSimulationUnit(gufe.ProtocolUnit, AbsoluteUnitMixin):
@staticmethod
def _check_restart(output_settings: SettingsBaseModel, shared_path: pathlib.Path):
"""
Check if we are doing a restart.
Parameters
----------
output_settings : SettingsBaseModel
The simulation output settings
shared_path : pathlib.Path
The shared directory where we should be looking for existing files.
Raises
------
IOError
If one of the trajectory or checkpoint files are present
without the other.
Notes
-----
For now this just checks if the netcdf files are present in the
shared directory but in the future this may expand depending on
how warehouse works.
"""
trajectory = shared_path / output_settings.output_filename
checkpoint = shared_path / output_settings.checkpoint_storage_filename
if trajectory.is_file() and checkpoint.is_file():
return True
elif trajectory.is_file() ^ checkpoint.is_file():
if trajectory.is_file():
errmsg = "the trajectory file is present but not the checkpoint file. "
else:
errmsg = "the checkpoint file is present but not the trajectory file. "
errmsg = (
"Attempting to restart but "
+ errmsg
+ "This should not happen under normal circumstances."
)
raise IOError(errmsg)
else:
return False
@abc.abstractmethod
def _get_components(
self,
) -> tuple[
dict[str, list[Component]],
gufe.SolventComponent | None,
gufe.ProteinComponent | None,
dict[SmallMoleculeComponent, OFFMolecule],
]:
"""
Get the relevant components to create the alchemical system with.
Note
----
Must be implemented in the child class.
"""
...
def _get_lambda_schedule(
self, settings: dict[str, SettingsBaseModel]
) -> dict[str, list[float]]:
"""
Create the lambda schedule
Parameters
----------
settings : dict[str, SettingsBaseModel]
Settings for the unit.
Returns
-------
lambdas : dict[str, list[float]]
TODO
----
* Augment this by using something akin to the RFE protocol's
LambdaProtocol
"""
lambdas = dict()
lambda_elec = settings["lambda_settings"].lambda_elec
lambda_vdw = settings["lambda_settings"].lambda_vdw
lambda_rest = settings["lambda_settings"].lambda_restraints
# Reverse lambda schedule for vdw, end elec,
# since in AbsoluteAlchemicalFactory 1 means fully
# interacting (which would be non-interacting for us)
lambdas["lambda_electrostatics"] = [1 - x for x in lambda_elec]
lambdas["lambda_sterics"] = [1 - x for x in lambda_vdw]
lambdas["lambda_restraints"] = [x for x in lambda_rest]
return lambdas
def _get_states(
self,
alchemical_system: openmm.System,
positions: openmm.unit.Quantity,
box_vectors: openmm.unit.Quantity,
thermodynamic_settings: ThermoSettings,
lambdas: dict[str, list[float]],
solvent_component: BaseSolventComponent | None,
alchemically_restrained: bool,
) -> tuple[list[SamplerState], list[ThermodynamicState]]:
"""
Get a list of sampler and thermodynmic states from an
input alchemical system.
Parameters
----------
alchemical_system : openmm.System
Alchemical system to get states for.
positions : openmm.unit.Quantity
Positions of the alchemical system.
box_vectors : openmm.unit.Quantity
Box vectors of the alchemical system.
thermodynamic_settings : ThermoSettings
Settings controlling the thermodynamic parameters.
lambdas : dict[str, list[float]]
A dictionary of lambda scales.
solvent_component : BaseSolventComponent | None
The solvent component of the system, if there is one.
alchemically_restrained : bool
Whether or not the system requires a control parameter
for any alchemical restraints.
Returns
-------
sampler_states : list[SamplerState]
A list of SamplerStates for each replica in the system.
cmp_states : list[ThermodynamicState]
A list of ThermodynamicState for each replica in the system.
"""
# Fetch an alchemical state
alchemical_state = AlchemicalState.from_system(alchemical_system)
# Set up the system constants
temperature = thermodynamic_settings.temperature
pressure = thermodynamic_settings.pressure
constants = dict()
constants["temperature"] = ensure_quantity(temperature, "openmm")
if solvent_component is not None:
constants["pressure"] = ensure_quantity(pressure, "openmm")
# Get the thermodynamic parameter protocol
param_protocol = copy.deepcopy(lambdas)
# Get the composable states
if alchemically_restrained:
restraint_state = omm_restraints.RestraintParameterState(lambda_restraints=1.0)
composable_states = [alchemical_state, restraint_state]
else:
composable_states = [alchemical_state]
# In this case we also don't have a restraint being controlled
# so we drop it from the protocol
param_protocol.pop("lambda_restraints", None)
cmp_states = create_thermodynamic_state_protocol(
alchemical_system,
protocol=param_protocol,
constants=constants,
composable_states=composable_states,
)
sampler_state = SamplerState(positions=positions)
if alchemical_system.usesPeriodicBoundaryConditions():
sampler_state.box_vectors = box_vectors
sampler_states = [sampler_state for _ in cmp_states]
return sampler_states, cmp_states
@staticmethod
def _get_integrator(
integrator_settings: IntegratorSettings,
simulation_settings: MultiStateSimulationSettings,
system: openmm.System,
) -> openmmtools.mcmc.LangevinDynamicsMove:
"""
Return a LangevinDynamicsMove integrator
Parameters
----------
integrator_settings : IntegratorSettings
Settings controlling the Langevin integrator
simulation_settings : MultiStateSimulationSettings
Settings controlling the simulation.
system : openmm.System
The OpenMM System.
Returns
-------
integrator : openmmtools.mcmc.LangevinDynamicsMove
A configured integrator object.
Raises
------
ValueError
If there are virtual sites in the system, but
velocities are not being reassigned after every MCMC move.
"""
steps_per_iteration = settings_validation.convert_steps_per_iteration(
simulation_settings, integrator_settings
)
integrator = openmmtools.mcmc.LangevinDynamicsMove(
timestep=to_openmm(integrator_settings.timestep),
collision_rate=to_openmm(integrator_settings.langevin_collision_rate),
n_steps=steps_per_iteration,
reassign_velocities=integrator_settings.reassign_velocities,
n_restart_attempts=integrator_settings.n_restart_attempts,
constraint_tolerance=integrator_settings.constraint_tolerance,
)
# Validate for known issue when dealing with virtual sites
# and mutltistate simulations
if not integrator_settings.reassign_velocities:
for particle_idx in range(system.getNumParticles()):
if system.isVirtualSite(particle_idx):
errmsg = (
"Simulations with virtual sites without velocity "
"reassignments are unstable with MCMC integrators. "
"You can set `reassign_velocities` to ``True`` in the "
"`integrator_settings` to avoid this issue."
)
raise ValueError(errmsg)
return integrator
@staticmethod
def _get_reporter(
storage_path: pathlib.Path,
selection_indices: npt.NDArray,
simulation_settings: MultiStateSimulationSettings,
output_settings: MultiStateOutputSettings,
) -> multistate.MultiStateReporter:
"""
Get a MultistateReporter for the simulation you are running.
Parameters
----------
storage_path : pathlib.Path
Path to the directory where files should be written.
selection_indices : npt.NDArray
Array of system particle indices to subsample the system by.
simulation_settings : MultiStateSimulationSettings
Multistate simulation control settings, specifically containing
the amount of time per state sampling iteration.
output_settings: MultiStateOutputSettings
Output settings for the simulations
Returns
-------
reporter : multistate.MultiStateReporter
The reporter for the simulation.
Notes
-----
All this does is create the reporter, it works for both
new reporters and if we are doing a restart.
"""
# Define the trajectory & checkpoint files
nc = storage_path / output_settings.output_filename
# The checkpoint file in openmmtools is taken as a file relative
# to the location of the nc file, so you only want the filename
chk = output_settings.checkpoint_storage_filename
if output_settings.positions_write_frequency is not None:
pos_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.positions_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' position_write_frequency",
denominator_name="simulation settings' time_per_iteration",
)
else:
pos_interval = 0
if output_settings.velocities_write_frequency is not None:
vel_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.velocities_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' velocity_write_frequency",
denominator_name="simulation settings' time_per_iteration",
)
else:
vel_interval = 0
chk_intervals = settings_validation.convert_checkpoint_interval_to_iterations(
checkpoint_interval=output_settings.checkpoint_interval,
time_per_iteration=simulation_settings.time_per_iteration,
)
return multistate.MultiStateReporter(
storage=nc,
analysis_particle_indices=selection_indices,
checkpoint_interval=chk_intervals,
checkpoint_storage=chk,
position_interval=pos_interval,
velocity_interval=vel_interval,
)
@staticmethod
def _get_sampler(
integrator: openmmtools.mcmc.LangevinDynamicsMove,
reporter: openmmtools.multistate.MultiStateReporter,
simulation_settings: MultiStateSimulationSettings,
thermodynamic_settings: ThermoSettings,
compound_states: list[ThermodynamicState],
sampler_states: list[SamplerState],
platform: openmm.Platform,
restart: bool,
) -> multistate.MultiStateSampler:
"""
Get a sampler based on the equilibrium sampling method requested.
Parameters
----------
integrator : openmmtools.mcmc.LangevinDynamicsMove
The simulation integrator.
reporter : openmmtools.multistate.MultiStateReporter
The reporter to hook up to the sampler.
simulation_settings : MultiStateSimulationSettings
Settings for the alchemical sampler.
thermodynamic_settings : ThermoSettings
Thermodynamic settings
compound_states : list[ThermodynamicState]
A list of thermodynamic states to sample.
sampler_states : list[SamplerState]
A list of sampler states.
platform : openmm.Platform
The compute platform to use.
restart : bool
``True`` if we are doing a simulation restart.
Returns
-------
sampler : multistate.MultistateSampler
A sampler configured for the chosen sampling method.
"""
_SAMPLERS = {
"repex": multistate.ReplicaExchangeSampler,
"sams": multistate.SAMSSampler,
"independent": multistate.MultiStateSampler,
}
sampler_method = simulation_settings.sampler_method.lower()
try:
sampler_class = _SAMPLERS[sampler_method]
except KeyError:
errmsg = f"Unknown sampler {sampler_method}"
raise AttributeError(errmsg)
# Get the real time analysis values to use
rta_its, rta_min_its = settings_validation.convert_real_time_analysis_iterations(
simulation_settings=simulation_settings,
)
# Get the number of production iterations to run for
steps_per_iteration = integrator.n_steps
timestep = from_openmm(integrator.timestep)
number_of_iterations = int(
settings_validation.get_simsteps(
sim_length=simulation_settings.production_length,
timestep=timestep,
mc_steps=steps_per_iteration,
)
/ steps_per_iteration
)
# convert early_termination_target_error from kcal/mol to kT
early_termination_target_error = (
settings_validation.convert_target_error_from_kcal_per_mole_to_kT(
thermodynamic_settings.temperature,
simulation_settings.early_termination_target_error,
)
)
sampler_kwargs = {
"mcmc_moves": integrator,
"online_analysis_interval": rta_its,
"online_analysis_target_error": early_termination_target_error,
"online_analysis_minimum_iterations": rta_min_its,
"number_of_iterations": number_of_iterations,
}
if sampler_method == "sams":
sampler_kwargs |= {
"flatness_criteria": simulation_settings.sams_flatness_criteria,
"gamma0": simulation_settings.sams_gamma0,
}
if sampler_method == "repex":
sampler_kwargs |= {
"replica_mixing_scheme": "swap-all",
}
# Restarting so we just rebuild from storage.
if restart:
sampler = sampler_class.from_storage(reporter)
# We do some checks to make sure we are running the same system
# including ensuring that we have the same thermodynamic parameters and
# that the lambda schedule is the same.
for index, thermostate in enumerate(sampler._thermodynamic_states):
system_validation.assert_multistate_system_equality(
ref_system=compound_states[index].get_system(remove_thermostat=True),
stored_system=thermostate.get_system(remove_thermostat=True),
)
# Loop over each composable state (e.g. GlobalParameterState object)
# get the parameters and check that the values are the same.
for composable_state in compound_states[index]._composable_states:
for param in composable_state._parameters:
expected = getattr(compound_states[index], param)
stored = getattr(thermostate, param)
if expected != stored:
errmsg = (
f"System parameter {param} in checkpoint does "
"not match protocol system, cannot resume"
)
raise ValueError(errmsg)
if (
(simulation_settings.n_replicas != sampler.n_states)
or (simulation_settings.n_replicas != sampler.n_replicas)
or (sampler.mcmc_moves[0].n_steps != steps_per_iteration)
or (sampler.mcmc_moves[0].timestep != integrator.timestep)
):
errmsg = "System in checkpoint does not match protocol system, cannot resume"
raise ValueError(errmsg)
else:
sampler = sampler_class(**sampler_kwargs)
sampler.create(
thermodynamic_states=compound_states,
sampler_states=sampler_states,
storage=reporter,
)
# Get and set the context caches
sampler.energy_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
sampler.sampler_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
return sampler
def _run_simulation(
self,
sampler: multistate.MultiStateSampler,
reporter: multistate.MultiStateReporter,
settings: dict[str, SettingsBaseModel],
dry: bool,
):
"""
Run the simulation.
Parameters
----------
sampler : multistate.MultiStateSampler
The sampler associated with the simulation to run.
reporter : multistate.MultiStateReporter
The reporter associated with the sampler.
settings : dict[str, SettingsBaseModel]
The dictionary of settings for the protocol.
dry : bool
Whether or not to dry run the simulation
"""
# Get the relevant simulation steps
mc_steps = settings_validation.convert_steps_per_iteration(
simulation_settings=settings["simulation_settings"],
integrator_settings=settings["integrator_settings"],
)
equil_steps = settings_validation.get_simsteps(
sim_length=settings["simulation_settings"].equilibration_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=mc_steps,
)
prod_steps = settings_validation.get_simsteps(
sim_length=settings["simulation_settings"].production_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=mc_steps,
)
if not dry: # pragma: no-cover
# No production steps have been taken, so start from scratch
if sampler._iteration == 0:
# minimize
if self.verbose:
self.logger.info("minimizing systems")
sampler.minimize(max_iterations=settings["simulation_settings"].minimization_steps)
# equilibrate
if self.verbose:
self.logger.info("equilibrating systems")
sampler.equilibrate(int(equil_steps / mc_steps))
# At this point we are ready for production
if self.verbose:
self.logger.info("running production phase")
# We use `run` so that we're limited by the number of iterations
# we passed when we built the sampler.
sampler.run(n_iterations=int(prod_steps / mc_steps) - sampler._iteration)
if self.verbose:
self.logger.info("production phase complete")
else:
# close reporter when you're done, prevent file handle clashes
reporter.close()
# clean up the reporter file
fns = [
self.shared_basepath / settings["output_settings"].output_filename,
self.shared_basepath / settings["output_settings"].checkpoint_storage_filename,
]
for fn in fns:
fn.unlink()
def run(
self,
*,
system: openmm.System,
positions: openmm.unit.Quantity,
box_vectors: Quantity,
selection_indices: npt.NDArray,
alchemical_restraints: bool,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""
Run the free energy calculation using a multistate sampler.
Parameters
----------
system : openmm.System
The System to simulate.
positions : openmm.unit.Quantity
The positions of the System.
box_vectors : openff.units.Quantity
The box vectors of the System.
selection_indices : npt.NDArray
Indices of the System particles to write to file.
alchemical_restraints: bool,
Whether or not the system has alchemical restraints.
dry: bool
Do a dry run of the calculation, creating all the necessary
components, but without running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided at
the INFO logging level.
scratch_basepath : pathlib.Path | None
Where to store temporary files, defaults to the current working
directory if ``None``.
shared_basepath : pathlib.Path | None
Where to store calculation outputs, defaults to the current working
directory if ``None``.
Returns
-------
dict
Outputs created by the unit, including the debug objects
(i.e. sampler) if ``dry==True``
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting simulation unit")
# Get the settings
settings = self._get_settings()
# Check for a restart
self.restart = self._check_restart(
output_settings=settings["output_settings"],
shared_path=self.shared_basepath,
)
# Get the components
alchem_comps, solv_comp, prot_comp, small_mols = self._get_components()
# Get the lambda schedule
lambdas = self._get_lambda_schedule(settings)
# Get the compute platform
restrict_cpu = settings["forcefield_settings"].nonbonded_method.lower() == "nocutoff"
platform = omm_compute.get_openmm_platform(
platform_name=settings["engine_settings"].compute_platform,
gpu_device_index=settings["engine_settings"].gpu_device_index,
restrict_cpu_count=restrict_cpu,
)
# Get compound and sampler states
sampler_states, cmp_states = self._get_states(
alchemical_system=system,
positions=positions,
# convert the box vectors to vec3 from openff
box_vectors=make_vec3_box(box_vectors),
thermodynamic_settings=settings["thermo_settings"],
lambdas=lambdas,
solvent_component=solv_comp,
alchemically_restrained=alchemical_restraints,
)
# Get the integrator
integrator = self._get_integrator(
integrator_settings=settings["integrator_settings"],
simulation_settings=settings["simulation_settings"],
system=system,
)
try:
# Create or get the multistate reporter
reporter = self._get_reporter(
storage_path=self.shared_basepath,
selection_indices=selection_indices,
simulation_settings=settings["simulation_settings"],
output_settings=settings["output_settings"],
)
# Get the sampler
sampler = self._get_sampler(
integrator=integrator,
reporter=reporter,
simulation_settings=settings["simulation_settings"],
thermodynamic_settings=settings["thermo_settings"],
compound_states=cmp_states,
sampler_states=sampler_states,
platform=platform,
restart=self.restart,
)
# Run the simulation
self._run_simulation(
sampler=sampler,
reporter=reporter,
settings=settings,
dry=dry,
)
finally:
# Have to wrap this in a try/except, because we might
# be in a situation where the reporter or sampler weren't created
try:
# Order is reporter, contexts, sampler, integrator
reporter.close() # close to prevent file handle clashes
# clear GPU context
# Note: use cache.empty() when openmmtools #690 is resolved
for context in list(sampler.energy_context_cache._lru._data.keys()):
del sampler.energy_context_cache._lru._data[context]
for context in list(sampler.sampler_context_cache._lru._data.keys()):
del sampler.sampler_context_cache._lru._data[context]
# cautiously clear out the global context cache too
for context in list(openmmtools.cache.global_context_cache._lru._data.keys()):
del openmmtools.cache.global_context_cache._lru._data[context]
del sampler.sampler_context_cache, sampler.energy_context_cache
# Keep these around in a dry run so we can inspect things
if not dry:
# At this point we know the sampler exists, so we del the integrator
# first since it's associated with the sampler
del integrator, sampler
except UnboundLocalError:
pass
if not dry:
nc = self.shared_basepath / settings["output_settings"].output_filename
chk = self.shared_basepath / settings["output_settings"].checkpoint_storage_filename
return {
"trajectory": nc,
"checkpoint": chk,
}
else:
return {
"sampler": sampler,
"integrator": integrator,
}
def _execute(
self,
ctx: gufe.Context,
*,
setup_results,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure the environment hasn't changed
self._verify_execution_environment(setup_results.outputs)
# Get the relevant inputs for running the unit
system = deserialize(setup_results.outputs["system"])
positions = to_openmm(np.load(setup_results.outputs["positions"]) * offunit.nanometer)
selection_indices = setup_results.outputs["selection_indices"]
box_vectors = setup_results.outputs["box_vectors"]
if setup_results.outputs["restraint_geometry"] is not None:
alchemical_restraints = True
else:
alchemical_restraints = False
outputs = self.run(
system=system,
positions=positions,
box_vectors=box_vectors,
selection_indices=selection_indices,
alchemical_restraints=alchemical_restraints,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
**outputs,
}
class BaseAbsoluteMultiStateAnalysisUnit(gufe.ProtocolUnit, AbsoluteUnitMixin):
@staticmethod
def _analyze_multistate_energies(
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
sampler_method: str,
output_directory: pathlib.Path,
dry: bool,
):
"""
Analyze multistate energies and generate plots.
Parameters
----------
trajectory : pathlib.Path
Path to the NetCDF trajectory file.
checkpoint : pathlib.Path
The name of the checkpoint file. Note this is
relative in path to the trajectory file.
sampler_method : str
The multistate sampler method used.
output_directory : pathlib.Path
The path to where plots will be written.
dry : bool
Whether or not we are running a dry run.
"""
reporter = multistate.MultiStateReporter(
storage=trajectory,
# Note: openmmtools only wants the name of the checkpoint
# file, it assumes it to be in the same place as the trajectory
checkpoint_storage=checkpoint.name,
open_mode="r",
)
analyzer = multistate_analysis.MultistateEquilFEAnalysis(
reporter=reporter,
sampling_method=sampler_method,
result_units=offunit.kilocalorie_per_mole,
)
# Only create plots when not doing a dry run
if not dry:
analyzer.plot(filepath=output_directory, filename_prefix="")
analyzer.close()
reporter.close()
return analyzer.unit_results_dict
def run(
self,
*,
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Analyze the multistate simulation.
Parameters
----------
trajectory : pathlib.Path
Path to the MultiStateReporter generated NetCDF file.
checkpoint : pathlib.Path
Path to the checkpoint file generated by MultiStateReporter.
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting simulation analysis unit")
# Get the settings
settings = self._get_settings()
# Energies analysis
if verbose:
self.logger.info("Analyzing energies")
energy_analysis = self._analyze_multistate_energies(
trajectory=trajectory,
checkpoint=checkpoint,
sampler_method=settings["simulation_settings"].sampler_method.lower(),
output_directory=self.shared_basepath,
dry=dry,
)
return energy_analysis
def _execute(
self,
ctx: gufe.Context,
*,
setup_results,
simulation_results,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure the environment hasn't changed
self._verify_execution_environment(setup_results.outputs)
# Get the relevant inputs for running the unit
pdb_file = setup_results.outputs["pdb_structure"]
selection_indices = setup_results.outputs["selection_indices"]
restraint_geometry = setup_results.outputs["restraint_geometry"]
standard_state_corr = setup_results.outputs["standard_state_correction"]
trajectory = simulation_results.outputs["trajectory"]
checkpoint = simulation_results.outputs["checkpoint"]
outputs = self.run(
trajectory=trajectory,
checkpoint=checkpoint,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
# We re-include things here also to make
# life easier when gathering results.
"pdb_structure": pdb_file,
"trajectory": trajectory,
"checkpoint": checkpoint,
"selection_indices": selection_indices,
"restraint_geometry": restraint_geometry,
"standard_state_correction": standard_state_corr,
**outputs,
}
================================================
FILE: src/openfe/protocols/openmm_afe/equil_afe_settings.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Settings class for equilibrium AFE Protocols using OpenMM + OpenMMTools
This module implements the necessary settings necessary to run absolute free
energies using OpenMM.
See Also
--------
openfe.protocols.openmm_afe.AbsoluteSolvationProtocol
TODO
----
* Add support for restraints
"""
import numpy as np
from gufe.settings import (
OpenMMSystemGeneratorFFSettings,
SettingsBaseModel,
ThermoSettings,
)
from pydantic import field_validator
from openfe.protocols.openmm_utils.omm_settings import (
BaseSolvationSettings,
IntegratorSettings,
MDOutputSettings,
MDSimulationSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
)
from openfe.protocols.restraint_utils.settings import (
BaseRestraintSettings,
BoreschRestraintSettings,
)
class AlchemicalSettings(SettingsBaseModel):
"""
Alchemical settings for Protocols which use the
AbsoluteAlchemicalFactory.
"""
disable_alchemical_dispersion_correction: bool = False
"""
If True, the long-range dispersion correction will not
be included for the alchemical region, avoiding the need
to recompute the correction. This can improve performance,
at the cost of accuracy. Default is False.
"""
annihilate_sterics: bool = False
"""
If True, sterics (Lennard-Jones) will be annihilated instead
of decoupled. Default is False.
"""
softcore_alpha: float = 0.5
"""
Alchemical softcore parameter for the Lennard-Jones interactions
(default is 0.5).
The generalized softcore potential formalism introduced by
Pham and Shirts, J. Chem. Phys. 135, 034114 (2011), equation 13,
is used here. The ``softcore_a``, ``softcore_b``, and
``softcore_c`` parameters are used alongside ``softcore_alpha``
to control how the potential is scaled.
"""
softcore_a: float = 1.0
"""
Scaling constant ``a`` in
Eq. 13 from Pham and Shirts, J. Chem. Phys. 135, 034114 (2011).
"""
softcore_b: float = 1.0
"""
Scaling constant ``b`` in
Eq. 13 from Pham and Shirts, J. Chem. Phys. 135, 034114 (2011).
"""
softcore_c: float = 6.0
"""
Scaling constant ``c`` in
Eq. 13 from Pham and Shirts, J. Chem. Phys. 135, 034114 (2011).
"""
class LambdaSettings(SettingsBaseModel):
"""Lambda schedule settings.
Defines lists of floats to control various aspects of the alchemical
transformation.
Notes
-----
* In all cases a lambda value of 0 defines the system in state A, whilst
a value of 1 defines the system in state B. In an absolute transformation,
state A means a fully interacting ligand without any restraints applied,
and state B means a fully non-interacting ligand, with optional restraints
applied.
* ``lambda_elec``, ``lambda_vdw``, and ``lambda_restraints`` must all be of
the same length, defining all the windows of the transformation.
"""
# fmt: off
lambda_elec: list[float] = [
0.0, 0.25, 0.5, 0.75, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
]
# fmt: on
"""
List of floats of lambda values for the electrostatics.
Zero means fully interacting (state A),
and one means annihilated (state B).
Length of this list needs to match length of lambda_vdw and
lambda_restraints.
"""
# fmt: off
lambda_vdw: list[float] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.05, 0.1, 0.2, 0.3, 0.4,
0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0,
]
# fmt: on
"""
List of floats of lambda values for the van der Waals.
Zero means full interacting (state A) and one means decoupled (state B).
Length of this list needs to match length of lambda_elec and
lambda_restraints.
"""
# fmt: off
lambda_restraints: list[float] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
]
# fmt: on
"""
List of floats of lambda values for the restraints.
Zero means no restraints are applied (state A), and
one means restraints are fully applied (state B).
Note: The length of this list needs to match length of lambda_vdw and lambda_elec.
"""
@field_validator("lambda_elec", "lambda_vdw", "lambda_restraints")
def must_be_between_0_and_1(cls, v):
for window in v:
if not 0 <= window <= 1:
errmsg = (
f"Lambda windows must be between 0 and 1, got a window with value {window}."
)
raise ValueError(errmsg)
return v
@field_validator("lambda_elec", "lambda_vdw", "lambda_restraints")
def must_be_monotonic(cls, v):
difference = np.diff(v)
monotonic = np.all(difference >= 0)
if not monotonic:
errmsg = (
"The lambda schedule is not monotonically increasing, "
f"got the following schedule: {v}."
)
raise ValueError(errmsg)
return v
class ABFEPreEquilOutputSettings(MDOutputSettings):
output_indices: str = "all"
"""
Selection string for which part of the system to write coordinates for.
For now, must be "all".
"""
equil_nvt_structure: str | None = "equil_nvt_structure.pdb"
"""
Name of the PDB file containing the system after NVT pre-equilibration.
Only the atom subset specified by output_indices is saved.
Default 'equil_nvt_structure.pdb'.
"""
equil_npt_structure: str | None = "equil_npt_structure.pdb"
"""
Name of the PDB file containing the system after NPT pre-equilibration.
Only the atom subset specified by output_indices is saved.
Default 'equil_npt_structure.pdb'.
"""
production_trajectory_filename: str | None = "production_equil.xtc"
"""
Name pre-equilibration "production" (i.e. extended NPT) trajectory file.
Only the atom subset specified by output_indices is saved.
Default `production_equil.xtc`.
"""
log_output: str | None = "production_equil_simulation.log"
"""
Filename for writing the pre-equilibration extended NPT MD simulation
log file. This includes ns/day, timesteps, energies, density, etc.
Default 'production_equil_simulation.log'
"""
@field_validator("output_indices")
def must_be_all(cls, v):
# Would be better if this was just changed to a Literal
# but changing types in child classes in pydantic is messy
if v != "all":
msg = "output_indices must be all for ABFE pre-equilibration simulations"
raise ValueError(msg)
return v
# This subclasses from SettingsBaseModel as it has vacuum_forcefield and
# solvent_forcefield fields, not just a single forcefield_settings field
class AbsoluteSolvationSettings(SettingsBaseModel):
"""
Configuration object for ``AbsoluteSolvationProtocol``.
See Also
--------
openfe.protocols.openmm_afe.AbsoluteSolvationProtocol
"""
protocol_repeats: int
"""
The number of completely independent repeats of the entire sampling
process. The mean of the repeats defines the final estimate of FE
difference, while the variance between repeats is used as the uncertainty.
"""
@field_validator("protocol_repeats")
def must_be_positive(cls, v):
if v <= 0:
errmsg = f"protocol_repeats must be a positive value, got {v}."
raise ValueError(errmsg)
return v
# Inherited things
solvent_forcefield_settings: OpenMMSystemGeneratorFFSettings
vacuum_forcefield_settings: OpenMMSystemGeneratorFFSettings
"""Parameters to set up the force field with OpenMM Force Fields"""
thermo_settings: ThermoSettings
"""Settings for thermodynamic parameters"""
solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the system."""
# Alchemical settings
alchemical_settings: AlchemicalSettings
"""
Alchemical protocol settings.
"""
lambda_settings: LambdaSettings
"""
Settings for controlling the lambda schedule for the different components
(vdw, elec, restraints).
"""
# MD Engine things
vacuum_engine_settings: OpenMMEngineSettings
"""
Settings specific to the OpenMM engine, such as the compute platform
for the vacuum transformation.
"""
solvent_engine_settings: OpenMMEngineSettings
"""
Settings specific to the OpenMM engine, such as the compute platform
for the solvent transformation.
"""
# Sampling State defining things
integrator_settings: IntegratorSettings
"""
Settings for controlling the integrator, such as the timestep and
barostat settings.
"""
# Simulation run settings
vacuum_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical vacuum simulation control settings.
Notes
-----
The `NVT` equilibration should be set to 0 * unit.nanosecond
as it will not be run.
"""
vacuum_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the vacuum transformation.
"""
solvent_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical solvent simulation control settings.
"""
solvent_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the solvent transformation.
"""
vacuum_equil_output_settings: MDOutputSettings
"""
Simulation output settings for the vacuum non-alchemical equilibration.
"""
vacuum_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the vacuum transformation.
"""
solvent_equil_output_settings: MDOutputSettings
"""
Simulation output settings for the solvent non-alchemical equilibration.
"""
solvent_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the solvent transformation.
"""
partial_charge_settings: OpenFFPartialChargeSettings
"""
Settings for controlling how to assign partial charges,
including the partial charge assignment method, and the
number of conformers used to generate the partial charges.
"""
class AbsoluteBindingSettings(SettingsBaseModel):
"""
Configuration object for ``AbsoluteBindingPProtocol``
See Also
--------
openfe.protocols.openmm_afe.AbsoluteBindingProtocol
"""
protocol_repeats: int
"""
The number of completely independent repeats of the entire sampling
process. The mean of the repeats defines the final estimate of FE
difference, while the variance between repeats is used as the uncertainty.
"""
@field_validator("protocol_repeats")
def must_be_positive(cls, v):
if v <= 0:
errmsg = f"protocol_repeats must be a positive value, got {v}."
raise ValueError(errmsg)
return v
forcefield_settings: OpenMMSystemGeneratorFFSettings
"""Parameters to set up the force field with OpenMM Force Fields"""
thermo_settings: ThermoSettings
"""Settings for thermodynamic parameters"""
solvent_solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the system in the solvent."""
complex_solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the system in the complex."""
# Alchemical settings
alchemical_settings: AlchemicalSettings
"""
Alchemical protocol settings.
"""
complex_lambda_settings: LambdaSettings
"""
Settings for controlling the complex transformation leg
lambda schedule for the different components (vdw, elec, restraints).
"""
solvent_lambda_settings: LambdaSettings
"""
Settings for controlling the solvent transformation leg
lambda schedule for the different components (vdw, elec, restraints).
Notes
-----
* The `restraints` entry of the lambda settings will be ignored in the
solvent leg.
"""
# MD Engine things
engine_settings: OpenMMEngineSettings
"""
Settings specific to the OpenMM engine, such as the compute platform.
"""
# Sampling State defining things
solvent_integrator_settings: IntegratorSettings
"""
Settings for controlling the integrator, such as the timestep and
barostat settings in the solvent.
"""
complex_integrator_settings: IntegratorSettings
"""
Settings for controlling the integrator, such as the timestep and
barostat settings in the complex.
"""
# Simulation run settings
complex_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical complex simulation control settings.
"""
complex_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the complex transformation.
"""
solvent_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical solvent simulation control settings.
"""
solvent_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the solvent transformation.
"""
# Simulation output settings
complex_equil_output_settings: ABFEPreEquilOutputSettings
"""
Simulation output settings for the complex non-alchemical equilibration.
"""
complex_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the complex transformation.
"""
solvent_equil_output_settings: ABFEPreEquilOutputSettings
"""
Simulation output settings for the solvent non-alchemical equilibration.
"""
solvent_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the solvent transformation.
"""
partial_charge_settings: OpenFFPartialChargeSettings
"""
Settings for controlling how to assign partial charges,
including the partial charge assignment method, and the
number of conformers used to generate the partial charges.
"""
restraint_settings: BaseRestraintSettings
"""
Settings controlling how restraints are added to the system in the
complex simulation.
"""
================================================
FILE: src/openfe/protocols/openmm_afe/equil_binding_afe_method.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM Equilibrium Binding AFE Protocol --- :mod:`openfe.protocols.openmm_afe.equil_binding_afe_method`
==========================================================================================================
This module implements the necessary methodology tooling to calculate an
absolute binding free energy using OpenMM tools and one of the following
alchemical sampling methods:
* Hamiltonian Replica Exchange
* Self-adjusted mixture sampling
* Independent window sampling
Current limitations
-------------------
* Alchemical species with a net charge are not currently supported.
* Disappearing molecules are only allowed in state A.
* Only small molecules are allowed to act as alchemical molecules.
Acknowledgements
----------------
* This Protocol re-implements components from
`Yank `_.
"""
import logging
import uuid
import warnings
from collections import defaultdict
from typing import Any, Iterable
import gufe
from gufe import (
ChemicalSystem,
ProteinComponent,
ProteinMembraneComponent,
SmallMoleculeComponent,
SolventComponent,
settings,
)
from openff.units import unit as offunit
from openfe.due import Doi, due
from openfe.protocols.openmm_afe.equil_afe_settings import (
ABFEPreEquilOutputSettings,
AbsoluteBindingSettings,
AlchemicalSettings,
BoreschRestraintSettings,
IntegratorSettings,
LambdaSettings,
MDSimulationSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
)
from openfe.protocols.openmm_utils import (
settings_validation,
system_validation,
)
from .abfe_units import (
ABFEComplexAnalysisUnit,
ABFEComplexSetupUnit,
ABFEComplexSimUnit,
ABFESolventAnalysisUnit,
ABFESolventSetupUnit,
ABFESolventSimUnit,
)
from .afe_protocol_results import AbsoluteBindingProtocolResult
due.cite(
Doi("10.5281/zenodo.596504"),
description="Yank",
path="openfe.protocols.openmm_afe.equil_binding_afe_method",
cite_module=True,
)
due.cite(
Doi("10.5281/zenodo.596622"),
description="OpenMMTools",
path="openfe.protocols.openmm_afe.equil_binding_afe_method",
cite_module=True,
)
due.cite(
Doi("10.1371/journal.pcbi.1005659"),
description="OpenMM",
path="openfe.protocols.openmm_afe.equil_binding_afe_method",
cite_module=True,
)
logger = logging.getLogger(__name__)
class AbsoluteBindingProtocol(gufe.Protocol):
"""
Absolute binding free energy calculations using OpenMM and OpenMMTools.
See Also
--------
:mod:`openfe.protocols`
:class:`openfe.protocols.openmm_afe.AbsoluteBindingSettings`
:class:`openfe.protocols.openmm_afe.AbsoluteBindingProtocolResult`
:class:`openfe.protocols.openmm_afe.AbsoluteBindingSolventUnit`
:class:`openfe.protocols.openmm_afe.AbsoluteBindingComplexUnit`
"""
result_cls = AbsoluteBindingProtocolResult
_settings_cls = AbsoluteBindingSettings
_settings: AbsoluteBindingSettings
@classmethod
def _default_settings(cls):
"""A dictionary of initial settings for this creating this Protocol
These settings are intended as a suitable starting point for creating
an instance of this protocol. It is recommended, however that care is
taken to inspect and customize these before performing a Protocol.
Returns
-------
Settings
a set of default settings
"""
# fmt: off
return AbsoluteBindingSettings(
protocol_repeats=3,
forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(),
thermo_settings=settings.ThermoSettings(
temperature=298.15 * offunit.kelvin,
pressure=1 * offunit.bar,
),
alchemical_settings=AlchemicalSettings(),
solvent_lambda_settings=LambdaSettings(
lambda_elec=[
0.0, 0.25, 0.5, 0.75, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
],
lambda_vdw=[
0.0, 0.0, 0.0, 0.0, 0.0,
0.12, 0.24, 0.36, 0.48, 0.6, 0.7, 0.77, 0.85, 1.0
],
lambda_restraints=[
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
],
),
complex_lambda_settings=LambdaSettings(
lambda_elec=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.00, 1.0, 1.00, 1.0, 1.00, 1.0, 1.00, 1.0
],
lambda_vdw=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0
],
lambda_restraints=[
0.0, 0.2, 0.4, 0.6, 0.8, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.00, 1.0, 1.00, 1.0, 1.00, 1.0, 1.00, 1.0
],
),
partial_charge_settings=OpenFFPartialChargeSettings(),
complex_solvation_settings=OpenMMSolvationSettings(
solvent_padding=1.0 * offunit.nanometer,
),
solvent_solvation_settings=OpenMMSolvationSettings(),
engine_settings=OpenMMEngineSettings(),
solvent_integrator_settings=IntegratorSettings(),
complex_integrator_settings=IntegratorSettings(),
restraint_settings=BoreschRestraintSettings(),
solvent_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.1 * offunit.nanosecond,
equilibration_length=0.2 * offunit.nanosecond,
production_length=0.5 * offunit.nanosecond,
),
solvent_equil_output_settings=ABFEPreEquilOutputSettings(),
solvent_simulation_settings=MultiStateSimulationSettings(
n_replicas=14,
equilibration_length=1.0 * offunit.nanosecond,
production_length=10.0 * offunit.nanosecond,
),
solvent_output_settings=MultiStateOutputSettings(
output_structure="alchemical_system.pdb",
output_filename="solvent.nc",
checkpoint_storage_filename="solvent_checkpoint.nc",
),
complex_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.25 * offunit.nanosecond,
equilibration_length=0.5 * offunit.nanosecond,
production_length=5.0 * offunit.nanosecond,
),
complex_equil_output_settings=ABFEPreEquilOutputSettings(),
complex_simulation_settings=MultiStateSimulationSettings(
n_replicas=30,
equilibration_length=1 * offunit.nanosecond,
production_length=10.0 * offunit.nanosecond,
),
complex_output_settings=MultiStateOutputSettings(
output_structure="alchemical_system.pdb",
output_filename="complex.nc",
checkpoint_storage_filename="complex_checkpoint.nc",
),
)
# fmt: on
@classmethod
def _adaptive_settings(
cls,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
initial_settings: None | AbsoluteBindingSettings = None,
) -> AbsoluteBindingSettings:
"""
Get the recommended OpenFE settings for this Protocol based on the input states involved in the
transformation.
These are intended as a suitable starting point, which can be further
customized before creating a Protocol.
Parameters
----------
stateA : ChemicalSystem
The initial state of the transformation.
stateB : ChemicalSystem
The final state of the transformation.
initial_settings : None | AbsoluteBindingSettings, optional
Initial settings to adapt. If None, default settings are used.
Returns
-------
AbsoluteBindingSettings
The recommended settings for this protocol based on the input states.
"""
# use initial settings or default settings
if initial_settings is not None:
protocol_settings = initial_settings.model_copy(deep=True)
else:
protocol_settings = cls.default_settings()
# adapt the barostat based on the ProteinComponent
if stateA.contains(ProteinMembraneComponent):
protocol_settings.complex_integrator_settings.barostat = "MonteCarloMembraneBarostat"
return protocol_settings
@staticmethod
def _validate_endstates(
stateA: ChemicalSystem,
stateB: ChemicalSystem,
) -> None:
"""
A binding transformation is defined (in terms of gufe components)
as starting from one or more ligands with one protein and solvent,
that then ends up in a state with one less ligand.
Parameters
----------
stateA : ChemicalSystem
The chemical system of end state A
stateB : ChemicalSystem
The chemical system of end state B
Raises
------
ValueError
If stateA & stateB do not contain a ProteinComponent.
If stateA & stateB do not contain a SolventComponent.
If stateA has more than one unique Component.
If the stateA unique Component is not a SmallMoleculeComponent.
If stateB contains any unique Components.
If the alchemical species is charged.
"""
if not (stateA.contains(ProteinComponent) and stateB.contains(ProteinComponent)):
errmsg = "No ProteinComponent found"
raise ValueError(errmsg)
if not (stateA.contains(SolventComponent) and stateB.contains(SolventComponent)):
errmsg = "No SolventComponent found"
raise ValueError(errmsg)
# Needs gufe 1.3
diff = stateA.component_diff(stateB)
if len(diff[0]) != 1:
errmsg = (
"Only one alchemical species is supported. "
f"Number of unique components found in stateA: {len(diff[0])}."
)
raise ValueError(errmsg)
if not isinstance(diff[0][0], SmallMoleculeComponent):
errmsg = (
"Only disappearing small molecule components "
"are supported by this protocol. "
f"Found a {type(diff[0][0])}"
)
raise ValueError(errmsg)
# Check that the state A unique isn't charged
if diff[0][0].total_charge != 0:
errmsg = (
"Charged alchemical molecules are not currently "
"supported for solvation free energies. "
f"Molecule total charge: {diff[0][0].total_charge}."
)
raise ValueError(errmsg)
# If there are any alchemical Components in state B
if len(diff[1]) > 0:
errmsg = "Components appearing in state B are not currently supported"
raise ValueError(errmsg)
@staticmethod
def _validate_lambda_schedule(
lambda_settings: LambdaSettings,
simulation_settings: MultiStateSimulationSettings,
) -> None:
"""
Checks that the lambda schedule is set up correctly.
Parameters
----------
lambda_settings : LambdaSettings
the lambda schedule Settings
simulation_settings : MultiStateSimulationSettings
the settings for either the complex or solvent phase
Raises
------
ValueError
If the number of lambda windows differs for electrostatics, sterics,
and restraints.
If the number of replicas does not match the number of lambda windows.
If there are states with naked charges.
"""
lambda_elec = lambda_settings.lambda_elec
lambda_vdw = lambda_settings.lambda_vdw
lambda_restraints = lambda_settings.lambda_restraints
n_replicas = simulation_settings.n_replicas
# Ensure that all lambda components have equal amount of windows
lambda_components = [lambda_vdw, lambda_elec, lambda_restraints]
it = iter(lambda_components)
the_len = len(next(it))
if not all(len(lambda_comp) == the_len for lambda_comp in it):
errmsg = (
"Components elec, vdw, and restraints must have equal amount"
f" of lambda windows. Got {len(lambda_elec)} elec lambda"
f" windows, {len(lambda_vdw)} vdw lambda windows, and"
f"{len(lambda_restraints)} restraints lambda windows."
)
raise ValueError(errmsg)
# Ensure that number of overall lambda windows matches number of lambda
# windows for individual components
if n_replicas != len(lambda_vdw):
errmsg = (
f"Number of replicas {n_replicas} does not equal the"
f" number of lambda windows {len(lambda_vdw)}"
)
raise ValueError(errmsg)
# Check if there are no lambda windows with naked charges
for inx, lam in enumerate(lambda_elec):
if lam < 1 and lambda_vdw[inx] == 1:
errmsg = (
"There are states along this lambda schedule "
"where there are atoms with charges but no LJ "
f"interactions: lambda {inx}: "
f"elec {lam} vdW {lambda_vdw[inx]}"
)
raise ValueError(errmsg)
def _validate(
self,
*,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.ComponentMapping | list[gufe.ComponentMapping] | None = None,
extends: gufe.ProtocolDAGResult | None = None,
):
# Check we're not extending
if extends is not None:
# This technically should be NotImplementedError
# but gufe.Protocol.validate calls `_validate` wrapped around an
# except for NotImplementedError, so we can't raise it here
raise ValueError("Can't extend simulations yet")
# Check we're not using a mapping, since we're not doing anything with it
if mapping is not None:
wmsg = "A mapping was passed but is not used by this Protocol."
warnings.warn(wmsg)
# Validate the end states & alchemical components
system_validation.validate_chemical_system(stateA)
system_validation.validate_chemical_system(stateB)
self._validate_endstates(stateA, stateB)
# Validate the complex lambda schedule
self._validate_lambda_schedule(
self.settings.complex_lambda_settings,
self.settings.complex_simulation_settings,
)
# If the complex restraints schedule is all zero, it might be bad
# but we don't disallow it.
if all([i == 0.0 for i in self.settings.complex_lambda_settings.lambda_restraints]):
wmsg = (
"No restraints are being applied in the complex phase, "
"this will likely lead to problematic results."
)
warnings.warn(wmsg)
# Validate the solvent lambda schedule
self._validate_lambda_schedule(
self.settings.solvent_lambda_settings,
self.settings.solvent_simulation_settings,
)
# If the solvent restraints schedule is not all one, it was likely
# copied from the complex schedule. In this case we just ignore
# the values and let the user know.
# P.S. we don't need to change the settings at this point
# the list gets popped out later in the SolventUnit, because we
# don't have a restraint parameter state.
if any([i != 0.0 for i in self.settings.solvent_lambda_settings.lambda_restraints]):
wmsg = (
"There is an attempt to add restraints in the solvent "
"phase. This protocol does not apply restraints in the "
"solvent phase. These restraint lambda values will be ignored."
)
warnings.warn(wmsg)
# Check nonbond & solvent compatibility
nonbonded_method = self.settings.forcefield_settings.nonbonded_method
# Use the more complete system validation solvent checks
system_validation.validate_solvent(stateA, nonbonded_method)
# Validate the barostat used in combination with the protein component
system_validation.validate_barostat(
stateA, self.settings.complex_integrator_settings.barostat
)
# Validate solvation settings
settings_validation.validate_openmm_solvation_settings(
self.settings.solvent_solvation_settings
)
settings_validation.validate_openmm_solvation_settings(
self.settings.complex_solvation_settings
)
# Validate integrator things
# We validate the timstep for both the complex & solvent settings
settings_validation.validate_timestep(
self.settings.forcefield_settings.hydrogen_mass,
self.settings.complex_integrator_settings.timestep,
)
settings_validation.validate_timestep(
self.settings.forcefield_settings.hydrogen_mass,
self.settings.solvent_integrator_settings.timestep,
)
def _create(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.ComponentMapping | list[gufe.ComponentMapping] | None = None,
extends: gufe.ProtocolDAGResult | None = None,
) -> list[gufe.ProtocolUnit]:
# Validate inputs
self.validate(stateA=stateA, stateB=stateB, mapping=mapping, extends=extends)
# Get the alchemical components
alchem_comps = system_validation.get_alchemical_components(
stateA,
stateB,
)
# Get the name of the alchemical species
alchname = alchem_comps["stateA"][0].name
unit_classes: dict[str, dict[str, type[gufe.ProtocolUnit]]] = {
"solvent": {
"setup": ABFESolventSetupUnit,
"simulation": ABFESolventSimUnit,
"analysis": ABFESolventAnalysisUnit,
},
"complex": {
"setup": ABFEComplexSetupUnit,
"simulation": ABFEComplexSimUnit,
"analysis": ABFEComplexAnalysisUnit,
},
}
protocol_units: dict[str, list[gufe.ProtocolUnit]] = {"solvent": [], "complex": []}
for phase in ["solvent", "complex"]:
for i in range(self.settings.protocol_repeats):
repeat_id = int(uuid.uuid4())
setup = unit_classes[phase]["setup"](
protocol=self,
stateA=stateA,
stateB=stateB,
alchemical_components=alchem_comps,
generation=0,
repeat_id=repeat_id,
name=f"ABFE Setup: {alchname} {phase} leg: repeat {i} generation 0",
)
simulation = unit_classes[phase]["simulation"](
protocol=self,
# only need state A & alchem comps
stateA=stateA,
alchemical_components=alchem_comps,
setup_results=setup,
generation=0,
repeat_id=repeat_id,
name=f"ABFE Simulation: {alchname} {phase} leg: repeat {i} generation 0",
)
analysis = unit_classes[phase]["analysis"](
protocol=self,
setup_results=setup,
simulation_results=simulation,
generation=0,
repeat_id=repeat_id,
name=f"ABFE Analysis: {alchname} {phase} leg, repeat {i} generation 0",
)
protocol_units[phase] += [setup, simulation, analysis]
return protocol_units["solvent"] + protocol_units["complex"]
def _gather(
self, protocol_dag_results: Iterable[gufe.ProtocolDAGResult]
) -> dict[str, dict[str, Any]]:
# result units will have a repeat_id and generation
# first group according to repeat_id
unsorted_solvent_repeats = defaultdict(list)
unsorted_complex_repeats = defaultdict(list)
for d in protocol_dag_results:
pu: gufe.ProtocolUnitResult
for pu in d.protocol_unit_results:
if ("Analysis" not in pu.name) or (not pu.ok()):
continue
if pu.outputs["simtype"] == "solvent":
unsorted_solvent_repeats[pu.outputs["repeat_id"]].append(pu)
else:
unsorted_complex_repeats[pu.outputs["repeat_id"]].append(pu)
repeats: dict[str, dict[str, list[gufe.ProtocolUnitResult]]] = {
"solvent": {},
"complex": {},
}
for k, v in unsorted_solvent_repeats.items():
repeats["solvent"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
for k, v in unsorted_complex_repeats.items():
repeats["complex"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
return repeats
================================================
FILE: src/openfe/protocols/openmm_afe/equil_solvation_afe_method.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM Equilibrium Solvation AFE Protocol --- :mod:`openfe.protocols.openmm_afe.equil_solvation_afe_method`
===============================================================================================================
This module implements the necessary methodology tooling to run calculate an
absolute solvation free energy using OpenMM tools and one of the following
alchemical sampling methods:
* Hamiltonian Replica Exchange
* Self-adjusted mixture sampling
* Independent window sampling
Current limitations
-------------------
* Alchemical species with a net charge are not currently supported.
* Disappearing molecules are only allowed in state A. Support for
appearing molecules will be added in due course.
* Only small molecules are allowed to act as alchemical molecules.
Alchemically changing protein or solvent components would induce
perturbations which are too large to be handled by this Protocol.
Acknowledgements
----------------
* Originally based on hydration.py in
`espaloma_charge `_
"""
import logging
import uuid
import warnings
from collections import defaultdict
from typing import Any, Iterable, Optional, Union
import gufe
import numpy as np
from gufe import (
ChemicalSystem,
ProteinComponent,
SmallMoleculeComponent,
SolventComponent,
settings,
)
from openff.units import unit as offunit
from openfe.due import Doi, due
from openfe.protocols.openmm_afe.equil_afe_settings import (
AbsoluteSolvationSettings,
AlchemicalSettings,
IntegratorSettings,
LambdaSettings,
MDOutputSettings,
MDSimulationSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
)
from ..openmm_utils import settings_validation, system_validation
from .afe_protocol_results import AbsoluteSolvationProtocolResult
from .ahfe_units import (
AHFESolventAnalysisUnit,
AHFESolventSetupUnit,
AHFESolventSimUnit,
AHFEVacuumAnalysisUnit,
AHFEVacuumSetupUnit,
AHFEVacuumSimUnit,
)
due.cite(
Doi("10.5281/zenodo.596504"),
description="Yank",
path="openfe.protocols.openmm_afe.equil_solvation_afe_method",
cite_module=True,
)
due.cite(
Doi("10.48550/ARXIV.2302.06758"),
description="EspalomaCharge",
path="openfe.protocols.openmm_afe.equil_solvation_afe_method",
cite_module=True,
)
due.cite(
Doi("10.5281/zenodo.596622"),
description="OpenMMTools",
path="openfe.protocols.openmm_afe.equil_solvation_afe_method",
cite_module=True,
)
due.cite(
Doi("10.1371/journal.pcbi.1005659"),
description="OpenMM",
path="openfe.protocols.openmm_afe.equil_solvation_afe_method",
cite_module=True,
)
logger = logging.getLogger(__name__)
class AbsoluteSolvationProtocol(gufe.Protocol):
"""
Absolute solvation free energy calculations using OpenMM and OpenMMTools.
See Also
--------
:mod:`openfe.protocols`
:class:`openfe.protocols.openmm_afe.AbsoluteSolvationSettings`
:class:`openfe.protocols.openmm_afe.AbsoluteSolvationProtocolResult`
:class:`openfe.protocols.openmm_afe.AbsoluteSolvationVacuumUnit`
:class:`openfe.protocols.openmm_afe.AbsoluteSolvationSolventUnit`
"""
result_cls = AbsoluteSolvationProtocolResult
_settings_cls = AbsoluteSolvationSettings
_settings: AbsoluteSolvationSettings
@classmethod
def _default_settings(cls):
"""A dictionary of initial settings for this creating this Protocol
These settings are intended as a suitable starting point for creating
an instance of this protocol. It is recommended, however that care is
taken to inspect and customize these before performing a Protocol.
Returns
-------
Settings
a set of default settings
"""
return AbsoluteSolvationSettings(
protocol_repeats=3,
solvent_forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(),
vacuum_forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(
nonbonded_method="nocutoff",
),
thermo_settings=settings.ThermoSettings(
temperature=298.15 * offunit.kelvin,
pressure=1 * offunit.bar,
),
alchemical_settings=AlchemicalSettings(),
lambda_settings=LambdaSettings(
lambda_elec=[
0.0, 0.25, 0.5, 0.75, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lambda_vdw=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.12, 0.24,
0.36, 0.48, 0.6, 0.7, 0.77, 0.85, 1.0],
lambda_restraints=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
),
partial_charge_settings=OpenFFPartialChargeSettings(),
solvation_settings=OpenMMSolvationSettings(),
vacuum_engine_settings=OpenMMEngineSettings(),
solvent_engine_settings=OpenMMEngineSettings(),
integrator_settings=IntegratorSettings(),
solvent_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.1 * offunit.nanosecond,
equilibration_length=0.2 * offunit.nanosecond,
production_length=0.5 * offunit.nanosecond,
),
solvent_equil_output_settings=MDOutputSettings(
equil_nvt_structure="equil_nvt_structure.pdb",
equil_npt_structure="equil_npt_structure.pdb",
production_trajectory_filename="production_equil.xtc",
log_output="equil_simulation.log",
),
solvent_simulation_settings=MultiStateSimulationSettings(
n_replicas=14,
equilibration_length=1.0 * offunit.nanosecond,
production_length=10.0 * offunit.nanosecond,
),
solvent_output_settings=MultiStateOutputSettings(
output_filename="solvent.nc",
checkpoint_storage_filename="solvent_checkpoint.nc",
),
vacuum_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=None,
equilibration_length=0.2 * offunit.nanosecond,
production_length=0.5 * offunit.nanosecond,
),
vacuum_equil_output_settings=MDOutputSettings(
equil_nvt_structure=None,
equil_npt_structure="equil_structure.pdb",
production_trajectory_filename="production_equil.xtc",
log_output="equil_simulation.log",
),
vacuum_simulation_settings=MultiStateSimulationSettings(
n_replicas=14,
equilibration_length=0.5 * offunit.nanosecond,
production_length=2.0 * offunit.nanosecond,
),
vacuum_output_settings=MultiStateOutputSettings(
output_filename="vacuum.nc",
checkpoint_storage_filename="vacuum_checkpoint.nc",
),
) # fmt: skip
@staticmethod
def _validate_endstates(
stateA: ChemicalSystem,
stateB: ChemicalSystem,
) -> None:
"""
A solvent transformation is defined (in terms of gufe components)
as starting from one or more ligands in solvent and
ending up in a state with one less ligand.
No protein components are allowed.
Parameters
----------
stateA : ChemicalSystem
The chemical system of end state A
stateB : ChemicalSystem
The chemical system of end state B
Raises
------
ValueError
If stateA or stateB contains a ProteinComponent.
If there is no SolventComponent in either stateA or stateB.
If there are alchemical components in state B.
If there are non SmallMoleculeComponent alchemical species.
If there are more than one alchemical species.
If the alchemical species is charged.
Notes
-----
* Currently doesn't support alchemical components in state B.
* Currently doesn't support alchemical components which are not
SmallMoleculeComponents.
* Currently doesn't support more than one alchemical component
being desolvated.
* Currently doesn't support charged alchemical components.
* Solvent must always be present in both end states.
"""
# Check that there are no protein components
if stateA.contains(ProteinComponent) or stateB.contains(ProteinComponent):
errmsg = "Protein components are not allowed for absolute solvation free energies."
raise ValueError(errmsg)
# Check that there is a solvent component in both end states
if not (stateA.contains(SolventComponent) and stateB.contains(SolventComponent)):
errmsg = "No SolventComponent found in stateA and/or stateB"
raise ValueError(errmsg)
# Now we check the alchemical Components
diff = stateA.component_diff(stateB)
# Check that there's only one state A unique Component
if len(diff[0]) != 1:
errmsg = (
"Only one alchemical species is supported "
"for absolute solvation free energies. "
f"Number of unique components found in stateA: {len(diff[0])}."
)
raise ValueError(errmsg)
# Make sure that the state A unique is an SMC
if not isinstance(diff[0][0], SmallMoleculeComponent):
errmsg = (
"Only disappearing SmallMoleculeComponents "
"are supported by this protocol. "
f"Found a {type(diff[0][0])}"
)
raise ValueError(errmsg)
# Check that the state A unique isn't charged
if diff[0][0].total_charge != 0:
errmsg = (
"Charged alchemical molecules are not currently "
"supported for solvation free energies. "
f"Molecule total charge: {diff[0][0].total_charge}."
)
raise ValueError(errmsg)
# If there are any alchemical Components in state B
if len(diff[1]) > 0:
errmsg = "Components appearing in state B are not currently supported"
raise ValueError(errmsg)
@staticmethod
def _validate_lambda_schedule(
lambda_settings: LambdaSettings,
simulation_settings: MultiStateSimulationSettings,
) -> None:
"""
Checks that the lambda schedule is set up correctly.
Parameters
----------
lambda_settings : LambdaSettings
the lambda schedule Settings
simulation_settings : MultiStateSimulationSettings
the settings for either the vacuum or solvent phase
Raises
------
ValueError
If the number of lambda windows differs for electrostatics and sterics.
If the number of replicas does not match the number of lambda windows.
If there are states with naked charges.
Warnings
If there are non-zero values for restraints (lambda_restraints).
"""
lambda_elec = lambda_settings.lambda_elec
lambda_vdw = lambda_settings.lambda_vdw
lambda_restraints = lambda_settings.lambda_restraints
n_replicas = simulation_settings.n_replicas
# Ensure that all lambda components have equal amount of windows
lambda_components = [lambda_vdw, lambda_elec, lambda_restraints]
it = iter(lambda_components)
the_len = len(next(it))
if not all(len(lambda_comp) == the_len for lambda_comp in it):
errmsg = (
"Components elec, vdw, and restraints must have equal amount"
f" of lambda windows. Got {len(lambda_elec)} elec lambda"
f" windows, {len(lambda_vdw)} vdw lambda windows, and"
f"{len(lambda_restraints)} restraints lambda windows."
)
raise ValueError(errmsg)
# Ensure that number of overall lambda windows matches number of lambda
# windows for individual components
if n_replicas != len(lambda_vdw):
errmsg = (
f"Number of replicas {n_replicas} does not equal the"
f" number of lambda windows {len(lambda_vdw)}"
)
raise ValueError(errmsg)
# Check if there are lambda windows with naked charges
for inx, lam in enumerate(lambda_elec):
if lam < 1 and lambda_vdw[inx] == 1:
errmsg = (
"There are states along this lambda schedule "
"where there are atoms with charges but no LJ "
f"interactions: lambda {inx}: "
f"elec {lam} vdW {lambda_vdw[inx]}"
)
raise ValueError(errmsg)
# Check if there are lambda windows with non-zero restraints
if len([r for r in lambda_restraints if r != 0]) > 0:
wmsg = (
"Non-zero restraint lambdas applied. The absolute "
"solvation protocol doesn't apply restraints, "
"therefore restraints won't be applied. "
f"Given lambda_restraints: {lambda_restraints}"
)
logger.warning(wmsg)
warnings.warn(wmsg)
def _validate(
self,
*,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: Optional[Union[gufe.ComponentMapping, list[gufe.ComponentMapping]]] = None,
extends: Optional[gufe.ProtocolDAGResult] = None,
):
# Check we're not extending
if extends is not None:
# This should be a NotImplementedError, but the underlying
# `validate` method wraps a call to `_validate` around a
# NotImplementedError exception guard
raise ValueError("Can't extend simulations yet")
# Check we're not using a mapping, since we're not doing anything with it
if mapping is not None:
wmsg = "A mapping was passed but is not used by this Protocol."
warnings.warn(wmsg)
# Validate the endstates & alchemical components
system_validation.validate_chemical_system(stateA)
system_validation.validate_chemical_system(stateB)
self._validate_endstates(stateA, stateB)
# Validate the lambda schedule
for solv_sets in (
self.settings.solvent_simulation_settings,
self.settings.vacuum_simulation_settings,
):
self._validate_lambda_schedule(
self.settings.lambda_settings,
solv_sets,
)
# Check nonbond & solvent compatibility
solv_nonbonded_method = self.settings.solvent_forcefield_settings.nonbonded_method
vac_nonbonded_method = self.settings.vacuum_forcefield_settings.nonbonded_method
# Use the more complete system validation solvent checks
system_validation.validate_solvent(stateA, solv_nonbonded_method)
# Gas phase is always gas phase
if vac_nonbonded_method.lower() != "nocutoff":
errmsg = (
"Only the nocutoff nonbonded_method is supported for "
f"vacuum calculations, {vac_nonbonded_method} was "
"passed"
)
raise ValueError(errmsg)
# Validate solvation settings
settings_validation.validate_openmm_solvation_settings(self.settings.solvation_settings)
# Check vacuum equilibration MD settings is 0 ns
nvt_time = self.settings.vacuum_equil_simulation_settings.equilibration_length_nvt
if nvt_time is not None:
if not np.allclose(nvt_time, 0 * offunit.nanosecond):
errmsg = "NVT equilibration cannot be run in vacuum simulation"
raise ValueError(errmsg)
# Validate integrator things
settings_validation.validate_timestep(
self.settings.vacuum_forcefield_settings.hydrogen_mass,
self.settings.integrator_settings.timestep,
)
settings_validation.validate_timestep(
self.settings.solvent_forcefield_settings.hydrogen_mass,
self.settings.integrator_settings.timestep,
)
def _create(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: Optional[Union[gufe.ComponentMapping, list[gufe.ComponentMapping]]] = None,
extends: Optional[gufe.ProtocolDAGResult] = None,
) -> list[gufe.ProtocolUnit]:
# Validate inputs
self.validate(stateA=stateA, stateB=stateB, mapping=mapping, extends=extends)
# Get the alchemical components
alchem_comps = system_validation.get_alchemical_components(
stateA,
stateB,
)
# Get the name of the alchemical species
alchname = alchem_comps["stateA"][0].name
unit_classes: dict[str, dict[str, type[gufe.ProtocolUnit]]] = {
"solvent": {
"setup": AHFESolventSetupUnit,
"simulation": AHFESolventSimUnit,
"analysis": AHFESolventAnalysisUnit,
},
"vacuum": {
"setup": AHFEVacuumSetupUnit,
"simulation": AHFEVacuumSimUnit,
"analysis": AHFEVacuumAnalysisUnit,
},
}
protocol_units: dict[str, list[gufe.ProtocolUnit]] = {"solvent": [], "vacuum": []}
for phase in ["solvent", "vacuum"]:
for i in range(self.settings.protocol_repeats):
repeat_id = int(uuid.uuid4())
setup = unit_classes[phase]["setup"](
protocol=self,
stateA=stateA,
stateB=stateB,
alchemical_components=alchem_comps,
generation=0,
repeat_id=repeat_id,
name=f"AHFE Setup: {alchname} {phase} leg: repeat {i} generation 0",
)
simulation = unit_classes[phase]["simulation"](
protocol=self,
# only need state A & alchem comps
stateA=stateA,
alchemical_components=alchem_comps,
setup_results=setup,
generation=0,
repeat_id=repeat_id,
name=f"AHFE Simulation: {alchname} {phase} leg: repeat {i} generation 0",
)
analysis = unit_classes[phase]["analysis"](
protocol=self,
setup_results=setup,
simulation_results=simulation,
generation=0,
repeat_id=repeat_id,
name=f"AHFE Analysis: {alchname} {phase} leg, repeat {i} generation 0",
)
protocol_units[phase] += [setup, simulation, analysis]
return protocol_units["solvent"] + protocol_units["vacuum"]
def _gather(
self, protocol_dag_results: Iterable[gufe.ProtocolDAGResult]
) -> dict[str, dict[str, Any]]:
# result units will have a repeat_id and generation
# first group according to repeat_id
unsorted_solvent_repeats = defaultdict(list)
unsorted_vacuum_repeats = defaultdict(list)
for d in protocol_dag_results:
pu: gufe.ProtocolUnitResult
for pu in d.protocol_unit_results:
if ("Analysis" not in pu.name) or (not pu.ok()):
continue
if pu.outputs["simtype"] == "solvent":
unsorted_solvent_repeats[pu.outputs["repeat_id"]].append(pu)
else:
unsorted_vacuum_repeats[pu.outputs["repeat_id"]].append(pu)
repeats: dict[str, dict[str, list[gufe.ProtocolUnitResult]]] = {
"solvent": {},
"vacuum": {},
}
for k, v in unsorted_solvent_repeats.items():
repeats["solvent"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
for k, v in unsorted_vacuum_repeats.items():
repeats["vacuum"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
return repeats
================================================
FILE: src/openfe/protocols/openmm_md/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Run MD simulation using OpenMM and OpenMMTools.
"""
from .plain_md_methods import (
PlainMDProtocol,
PlainMDProtocolResult,
PlainMDProtocolSettings,
PlainMDSetupUnit,
PlainMDSimulationUnit,
)
__all__ = [
"PlainMDProtocol",
"PlainMDProtocolSettings",
"PlainMDProtocolResult",
"PlainMDSetupUnit",
"PlainMDSimulationUnit",
]
================================================
FILE: src/openfe/protocols/openmm_md/plain_md_methods.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM MD Protocol --- :mod:`openfe.protocols.openmm_md.plain_md_methods`
===========================================================================================
This module implements the necessary methodology tools to run an MD
simulation using OpenMM tools.
"""
from __future__ import annotations
import logging
import pathlib
import time
import uuid
import warnings
from collections import defaultdict
from typing import Any, Iterable, Optional
import gufe
import mdtraj
import numpy as np
import openmm
import openmm.unit as omm_unit
from gufe import (
BaseSolventComponent,
ChemicalSystem,
SmallMoleculeComponent,
SolvatedPDBComponent,
settings,
)
from gufe.protocols.errors import ProtocolUnitExecutionError
from gufe.settings.typing import KelvinQuantity
from mdtraj.reporters import XTCReporter
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.units import Quantity, unit
from openff.units.openmm import from_openmm, to_openmm
from openmm import MonteCarloBarostat, MonteCarloMembraneBarostat
import openfe
from openfe.protocols.openmm_md.plain_md_settings import (
IntegratorSettings,
MDOutputSettings,
MDSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
PlainMDProtocolSettings,
)
from openfe.protocols.openmm_utils import (
charge_generation,
omm_compute,
serialization,
settings_validation,
system_creation,
system_validation,
)
from openfe.protocols.openmm_utils.omm_settings import (
BasePartialChargeSettings,
FemtosecondQuantity,
)
from openfe.utils import log_system_probe, without_oechem_backend
logger = logging.getLogger(__name__)
class PlainMDProtocolResult(gufe.ProtocolResult):
"""
Dict-like container for the output of a PlainMDProtocol.
Provides access to simulation outputs including the pre-minimized
system PDB and production trajectory files.
"""
def __init__(self, **data):
super().__init__(**data)
# data is mapping of str(repeat_id): list[protocolunitresults]
if any(len(pur_list) > 2 for pur_list in self.data.values()):
raise NotImplementedError("Can't stitch together results yet")
def get_estimate(self):
"""Since no results as output --> returns None
Returns
-------
None
"""
return None
def get_uncertainty(self):
"""Since no results as output --> returns None"""
return None
def get_traj_filename(self) -> list[pathlib.Path]:
"""
Get a list of trajectory paths
Returns
-------
traj : list[pathlib.Path]
list of paths (pathlib.Path) to the simulation trajectory
"""
traj = [pus[0].outputs["nc"] for pus in self.data.values()]
return traj
def get_pdb_filename(self) -> list[pathlib.Path]:
"""
Get a list of paths to the pdb files of the pre-minimized system.
Returns
-------
pdbs : list[pathlib.Path]
list of paths (pathlib.Path) to the pdb files
"""
pdbs = [pus[0].outputs["system_pdb"] for pus in self.data.values()]
return pdbs
class PlainMDProtocol(gufe.Protocol):
"""
Protocol for running Molecular Dynamics simulations using OpenMM.
See Also
--------
:mod:`openfe.protocols`
:class:`openfe.protocols.openmm_md.PlainMDProtocolSettings`
:class:`openfe.protocols.openmm_md.PlainMDProtocolUnit`
:class:`openfe.protocols.openmm_md.PlainMDProtocolResult`
"""
result_cls = PlainMDProtocolResult
_settings_cls = PlainMDProtocolSettings
_settings: PlainMDProtocolSettings
@classmethod
def _default_settings(cls):
"""A dictionary of initial settings for this creating this Protocol
These settings are intended as a suitable starting point for creating
an instance of this protocol. It is recommended, however that care is
taken to inspect and customize these before performing a Protocol.
Returns
-------
Settings
a set of default settings
"""
return PlainMDProtocolSettings(
forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(),
thermo_settings=settings.ThermoSettings(
temperature=298.15 * unit.kelvin,
pressure=1 * unit.bar,
),
partial_charge_settings=OpenFFPartialChargeSettings(),
solvation_settings=OpenMMSolvationSettings(),
engine_settings=OpenMMEngineSettings(),
integrator_settings=IntegratorSettings(),
simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.1 * unit.nanosecond,
equilibration_length=1.0 * unit.nanosecond,
production_length=5.0 * unit.nanosecond,
),
output_settings=MDOutputSettings(checkpoint_storage_filename="checkpoint.xml"),
protocol_repeats=1,
)
def _validate(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: Optional[dict[str, gufe.ComponentMapping]] = None,
extends: Optional[gufe.ProtocolDAGResult] = None,
):
# Check we're not extending
if extends is not None:
# This technically should be NotImplementedError
# but gufe.Protocol.validate calls `_validate` wrapped around an
# except for NotImplementedError, so we can't raise it here
raise ValueError("Can't extend simulations yet")
# Check we're not using a mapping, since we're not doing anything with it
if mapping is not None:
wmsg = "A mapping was passed but is not used by this Protocol."
warnings.warn(wmsg)
# check that stateA and stateB are the same
if stateA is not stateB:
errmsg = "The two end states do not match."
raise ValueError(errmsg)
# Validate the ChemicalSystem
system_validation.validate_chemical_system(stateA)
# Validate solvent component if present
nonbond = self.settings.forcefield_settings.nonbonded_method
system_validation.validate_solvent(stateA, nonbond)
# Validate the BaseSolventComponents
base_solvent = stateA.get_components_of_type(BaseSolventComponent)
if len(base_solvent) > 1:
errmsg = "Multiple BaseSolventComponents found, only one is supported."
raise ValueError(errmsg)
# Validate protein component if present
system_validation.validate_protein(stateA)
# Validate the barostat used in combination with the protein component
system_validation.validate_barostat(stateA, self.settings.integrator_settings.barostat)
# Validate solvation settings
settings_validation.validate_openmm_solvation_settings(self.settings.solvation_settings)
# is the timestep good for the mass?
settings_validation.validate_timestep(
self.settings.forcefield_settings.hydrogen_mass,
self.settings.integrator_settings.timestep,
)
def _create(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: Optional[dict[str, gufe.ComponentMapping]] = None,
extends: Optional[gufe.ProtocolDAGResult] = None,
) -> list[gufe.ProtocolUnit]:
# validate the inputs
self.validate(stateA=stateA, stateB=stateB, mapping=mapping, extends=extends)
# actually create and return Units
# TODO: Deal with multiple ProteinComponents
solvent_comp, protein_comp, small_mols = system_validation.get_components(stateA)
system_name = "Solvent MD" if stateA.contains(BaseSolventComponent) else "Vacuum MD"
for comp in [protein_comp] + small_mols:
if comp is not None:
comp_type = comp.__class__.__name__
if len(comp.name) == 0:
comp_name = "NoName"
else:
comp_name = comp.name
system_name += f" {comp_type}:{comp_name}"
# make the DAG from the setup and simulation units
n_repeats = self.settings.protocol_repeats
units = []
for i in range(n_repeats):
repeat_id = int(uuid.uuid4())
setup = PlainMDSetupUnit(
protocol=self,
stateA=stateA,
generation=0,
repeat_id=repeat_id,
name=f"MD Setup: {system_name} repeat {i} generation 0",
)
sim = PlainMDSimulationUnit(
protocol=self,
stateA=stateA,
generation=0,
repeat_id=repeat_id,
setup_results=setup,
name=f"MD Simulation: {system_name} repeat {i} generation 0",
)
units.extend([setup, sim])
return units
def _gather(self, protocol_dag_results: Iterable[gufe.ProtocolDAGResult]) -> dict[str, Any]:
# result units will have a repeat_id and generations within this
# repeat_id
# first group according to repeat_id
unsorted_repeats = defaultdict(list)
for d in protocol_dag_results:
pu: gufe.ProtocolUnitResult
for pu in d.protocol_unit_results:
# Only keep the simulation units which are ok
if ("Simulation" not in pu.name) or (not pu.ok()):
continue
unsorted_repeats[pu.outputs["repeat_id"]].append(pu)
# then sort by generation within each repeat_id list
repeats: dict[str, list[gufe.ProtocolUnitResult]] = {}
for k, v in unsorted_repeats.items():
repeats[str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
# returns a dict of repeat_id: sorted list of ProtocolUnitResult
return repeats
class PlainMDUnitMixin:
def _prepare(
self,
verbose: bool,
scratch_basepath: pathlib.Path | None,
shared_basepath: pathlib.Path | None,
):
"""
Set basepaths and do some initial logging.
Parameters
----------
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath : pathlib.Path | None
Optional base path to write scratch files to.
shared_basepath : pathlib.Path | None
Optional base path to write shared files to.
"""
self.verbose = verbose
# set basepaths
def _set_optional_path(basepath):
if basepath is None:
return pathlib.Path(".")
return basepath
self.scratch_basepath = _set_optional_path(scratch_basepath)
self.shared_basepath = _set_optional_path(shared_basepath)
class PlainMDSetupUnit(PlainMDUnitMixin, gufe.ProtocolUnit):
"""
Protocol setup unit for plain MD simulations which handles charging, system building and solvation.
"""
@staticmethod
def _assign_partial_charges(
charge_settings: OpenFFPartialChargeSettings,
smc_components: dict[SmallMoleculeComponent, OFFMolecule],
) -> None:
"""
Assign partial charges to SMCs.
Parameters
----------
charge_settings : OpenFFPartialChargeSettings
Settings for controlling how the partial charges are assigned.
smc_components : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
SmallMoleculeComponent.
"""
for mol in smc_components.values():
charge_generation.assign_offmol_partial_charges(
offmol=mol,
overwrite=False,
method=charge_settings.partial_charge_method,
toolkit_backend=charge_settings.off_toolkit_backend,
generate_n_conformers=charge_settings.number_of_conformers,
nagl_model=charge_settings.nagl_model,
)
def run(
self,
*,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Setup a plain MD system.
Parameters
----------
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created by the setup unit or the debug objects
(e.g. HybridTopologyFactory) if ``dry==True``.
Raises
------
error
Exception if anything failed
"""
# Prepare paths and set verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if verbose:
self.logger.info("Creating system")
# 0. General setup and settings dependency resolution step
# Extract relevant settings
protocol_settings: PlainMDProtocolSettings = self._inputs["protocol"].settings
stateA = self._inputs["stateA"]
forcefield_settings: settings.OpenMMSystemGeneratorFFSettings = (
protocol_settings.forcefield_settings
)
thermo_settings: settings.ThermoSettings = protocol_settings.thermo_settings
solvation_settings: OpenMMSolvationSettings = protocol_settings.solvation_settings
charge_settings: BasePartialChargeSettings = protocol_settings.partial_charge_settings
sim_settings: MDSimulationSettings = protocol_settings.simulation_settings
output_settings: MDOutputSettings = protocol_settings.output_settings
integrator_settings: IntegratorSettings = protocol_settings.integrator_settings
timestep = integrator_settings.timestep
# is the timestep good for the mass?
settings_validation.validate_timestep(forcefield_settings.hydrogen_mass, timestep)
# do step validation early and pass through the units
if sim_settings.equilibration_length_nvt is not None:
equil_steps_nvt = settings_validation.get_simsteps(
sim_length=sim_settings.equilibration_length_nvt,
timestep=timestep,
mc_steps=1,
)
else:
equil_steps_nvt = None
equil_steps_npt = settings_validation.get_simsteps(
sim_length=sim_settings.equilibration_length,
timestep=timestep,
mc_steps=1,
)
prod_steps = settings_validation.get_simsteps(
sim_length=sim_settings.production_length,
timestep=timestep,
mc_steps=1,
)
solvent_comp, protein_comp, small_mols = system_validation.get_components(stateA)
if isinstance(protein_comp, SolvatedPDBComponent):
solvent_comp = protein_comp
# 1. Create stateA system
# Create a dictionary of OFFMol for each SMC for bookkeeping
smc_components: dict[SmallMoleculeComponent, OFFMolecule] = {
i: i.to_openff() for i in small_mols
}
# a. assign partial charges to smcs
self._assign_partial_charges(charge_settings, smc_components)
# b. get a system generator
if output_settings.forcefield_cache is not None:
ffcache = self.shared_basepath / output_settings.forcefield_cache
else:
ffcache = None
# Note: we block out the oechem backend for all systemgenerator
# linked operations to avoid any smiles operations that can
# go wrong when doing rdkit->OEchem roundtripping
with without_oechem_backend():
system_generator = system_creation.get_system_generator(
forcefield_settings=forcefield_settings,
integrator_settings=integrator_settings,
thermo_settings=thermo_settings,
cache=ffcache,
has_solvent=solvent_comp is not None,
)
# Force creation of smc templates so we can solvate later
for mol in smc_components.values():
system_generator.create_system(mol.to_topology().to_openmm(), molecules=[mol])
# c. get OpenMM Modeller + a resids dictionary for each component
stateA_modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=protein_comp,
solvent_comp=solvent_comp,
small_mols=smc_components,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
# d. get topology & positions
# Note: roundtrip positions to remove vec3 issues
stateA_topology = stateA_modeller.getTopology()
stateA_positions = to_openmm(from_openmm(stateA_modeller.getPositions()))
# e. create the stateA System
stateA_system = system_generator.create_system(
stateA_topology,
molecules=[s.to_openff() for s in small_mols],
)
# f. Save pdb of entire system topology to file, this is always needed for restarts
with open(self.shared_basepath / output_settings.preminimized_structure, "w") as f:
openmm.app.PDBFile.writeFile(stateA_topology, stateA_positions, file=f, keepIds=True)
# g. Save the system and positions to file
system_outfile = self.shared_basepath / "system.xml.bz2"
serialization.serialize(stateA_system, system_outfile)
positions_outfile = self.shared_basepath / "input_positions.npy"
np.save(positions_outfile, stateA_positions.value_in_unit(omm_unit.nanometers))
unit_results_dict = {
"system": system_outfile,
# save the positions to higher precision
"positions": positions_outfile,
"system_pdb": self.shared_basepath / output_settings.preminimized_structure,
"equil_steps_nvt": equil_steps_nvt,
"equil_steps_npt": equil_steps_npt,
"prod_steps": prod_steps,
}
if dry:
# add non serialised stuff for testing
debug_info = {
"system": stateA_system,
"positions": stateA_positions,
"topology": stateA_topology,
}
unit_results_dict["debug"] = debug_info
return unit_results_dict
def _execute(
self,
ctx: gufe.Context,
**kwargs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
outputs = self.run(scratch_basepath=ctx.scratch, shared_basepath=ctx.shared)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
# track some version restart info to check compatibility
"openmm_version": openmm.__version__,
"openfe_version": openfe.__version__,
"gufe_version": gufe.__version__,
**outputs,
}
class PlainMDSimulationUnit(PlainMDUnitMixin, gufe.ProtocolUnit):
"""
Protocol unit for plain MD simulation equilibration and production runs (NonTransformation).
"""
@staticmethod
def _check_restart(output_settings: MDOutputSettings, shared_path: pathlib.Path):
"""
Check if we are doing a restart.
Parameters
----------
output_settings : MDOutputSettings
The simulation output settings
shared_path : pathlib.Path
The shared directory where we should be looking for existing files.
Notes
-----
For now this just checks if the checkpoint state file is present in the
shared directory but in the future this may expand depending on
how warehouse works.
"""
checkpoint = shared_path / output_settings.checkpoint_storage_filename
return checkpoint.is_file()
@staticmethod
def _verify_execution_environment(
setup_outputs: dict[str, Any],
) -> None:
"""
Check that the Python environment hasn't changed based on the
relevant Python library versions stored in the setup outputs.
"""
try:
if (
(gufe.__version__ != setup_outputs["gufe_version"])
or (openfe.__version__ != setup_outputs["openfe_version"])
or (openmm.__version__ != setup_outputs["openmm_version"])
):
errmsg = "Python environment has changed, cannot continue Protocol execution."
raise ProtocolUnitExecutionError(errmsg)
except KeyError:
errmsg = "Missing environment information from setup outputs."
raise ProtocolUnitExecutionError(errmsg)
@staticmethod
def _save_pdb_subset(
simulation: openmm.app.Simulation,
output_settings: MDOutputSettings,
file_name: pathlib.Path,
):
# get the positions
positions = to_openmm(
from_openmm(
simulation.context.getState(
getPositions=True, enforcePeriodicBox=False
).getPositions()
)
)
# get the subset from the output settings
mdtraj_top = mdtraj.Topology.from_openmm(simulation.topology)
selection_indices = mdtraj_top.select(output_settings.output_indices)
traj = mdtraj.Trajectory(
positions[selection_indices, :],
mdtraj_top.subset(selection_indices),
)
traj.save_pdb(file_name)
@staticmethod
def _run_dynamics(
simulation: openmm.app.Simulation,
steps: int,
temperature: KelvinQuantity,
barostat_frequency: Quantity,
output_settings: MDOutputSettings,
verbose: bool = True,
output_path: None | pathlib.Path = None,
reinitialize_velocities: bool = True,
):
"""
Worker method to set the temperature, barostat and run dynamics and save final structure output.
"""
# only set the velocities to temperature if we are not restarting this section
if reinitialize_velocities:
# set the velocities to temperature
simulation.context.setVelocitiesToTemperature(to_openmm(temperature))
# Setup the barostat
for x in simulation.context.getSystem().getForces():
if isinstance(x, (MonteCarloBarostat, MonteCarloMembraneBarostat)):
x.setFrequency(barostat_frequency.m)
# run the simulation
t0 = time.time()
simulation.step(steps)
t1 = time.time()
if verbose:
logger.info(f"Completed dynamics in {t1 - t0} seconds")
# save the final frame if a file path is passed
if output_path is not None:
PlainMDSimulationUnit._save_pdb_subset(
simulation,
output_settings,
output_path,
)
@staticmethod
def _get_remaining_steps(
current_step_count: int,
equil_steps_nvt: int,
equil_steps_npt: int,
prod_steps: int,
) -> tuple[int, int, int, bool]:
"""
Work out the remaining steps for each phase of the simulation based on the current step count,
and determine if production has already started.
Returns
-------
equil_steps_nvt : int
The number of nvt steps left to run
equil_steps_npt : int
The number of npt steps left to run
prod_steps : int
The number of production steps left to run
production_started : bool
Whether the production phase has already started or not
"""
nvt_end = equil_steps_nvt
npt_end = equil_steps_nvt + equil_steps_npt
prod_end = equil_steps_nvt + equil_steps_npt + prod_steps
if npt_end < current_step_count <= prod_end:
# In the production phase
return 0, 0, prod_end - current_step_count, True
elif nvt_end < current_step_count <= npt_end:
# In the NPT equilibration phase
return 0, npt_end - current_step_count, prod_steps, False
else:
# In the NVT equilibration phase
return nvt_end - current_step_count, equil_steps_npt, prod_steps, False
@staticmethod
def _run_MD(
simulation: openmm.app.Simulation,
positions: omm_unit.Quantity,
simulation_settings: MDSimulationSettings,
output_settings: MDOutputSettings,
temperature: KelvinQuantity,
barostat_frequency: Quantity,
timestep: FemtosecondQuantity,
equil_steps_nvt: int | None,
equil_steps_npt: int,
prod_steps: int,
verbose: bool = True,
shared_basepath: pathlib.Path | None = None,
restart: bool = False,
) -> None:
"""
Energy minimization, Equilibration and Production MD to be reused
in multiple protocols
Parameters
----------
simulation : openmm.app.Simulation
An OpenMM simulation to simulate.
positions : openmm.unit.Quantity
Initial positions for the system.
simulation_settings : SimulationSettingsMD
Settings for MD simulation
output_settings: OutputSettingsMD
Settings for output of MD simulation
temperature: KelvinQuantity
temperature setting
barostat_frequency: openff.units.Quantity
Frequency for the barostat
timestep: FemtosecondQuantity
Simulation integration timestep
equil_steps_nvt: Optional[int]
number of steps for NVT equilibration
if None, no NVT equilibration will be performed
equil_steps_npt: int
number of steps for NPT equilibration
prod_steps: int
number of steps for the production run
verbose: bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
shared_basepath : Pathlike, optional
Where to run the calculation, defaults to current working directory
restart: bool, optional, default=False
Whether we are restarting from a previous simulation or not, the checkpoint file should be
present in the shared directories.
"""
if shared_basepath is None:
shared_basepath = pathlib.Path(".")
# get the checkpointing interval for states and positions
checkpoint_interval = settings_validation.get_simsteps(
sim_length=output_settings.checkpoint_interval,
timestep=timestep,
mc_steps=1,
)
# as nvt steps can be None set to 0 in this case
equil_steps_nvt = equil_steps_nvt or 0
# track if production has already been started
production_started = False
# track if we need to reinitialize velocities for a phase
# on a fresh run, reinitialize velocities for the first phase.
# on a restart, preserve the checkpoint velocities for the phase being restarted.
reinitialize_velocities = not restart
# if restarting skip setup and minimization as they should be completed by the time the checkpoint reporter is used
if restart:
if verbose:
logger.info("Restarting simulation from checkpoint state")
simulation.loadState(str(shared_basepath / output_settings.checkpoint_storage_filename))
# workout the number of steps to run in each phase based on the current simulation step count
current_step_count = simulation.context.getStepCount()
equil_steps_nvt, equil_steps_npt, prod_steps, production_started = (
PlainMDSimulationUnit._get_remaining_steps(
current_step_count=current_step_count,
equil_steps_nvt=equil_steps_nvt,
equil_steps_npt=equil_steps_npt,
prod_steps=prod_steps,
)
)
else:
# this is the non restart case and requires minimization before moving on
simulation.context.setPositions(positions)
# minimize
if verbose:
logger.info("Minimizing systems")
simulation.minimizeEnergy(maxIterations=simulation_settings.minimization_steps)
if output_settings.minimized_structure:
PlainMDSimulationUnit._save_pdb_subset(
simulation,
output_settings,
shared_basepath / output_settings.minimized_structure,
)
# add the checkpoint reporter so we can recover during the equilibration / production phases
if output_settings.checkpoint_storage_filename:
simulation.reporters.append(
openmm.app.CheckpointReporter(
file=str(shared_basepath / output_settings.checkpoint_storage_filename),
reportInterval=checkpoint_interval,
writeState=True, # writes portable XML via simulation.saveState()
)
)
# equilibrate
# NVT equilibration
if equil_steps_nvt > 0:
if verbose:
logger.info(f"Running NVT equilibration for {equil_steps_nvt} steps")
# setup the output path if we have one for the nvt equilibration
if output_settings.equil_nvt_structure is not None:
output_path = shared_basepath / output_settings.equil_nvt_structure
else:
output_path = None
PlainMDSimulationUnit._run_dynamics(
simulation=simulation,
steps=equil_steps_nvt,
temperature=temperature,
barostat_frequency=0 * unit.timestep, # turn off the barostat for this stage
output_settings=output_settings,
verbose=verbose,
output_path=output_path,
reinitialize_velocities=reinitialize_velocities,
)
# if we have run this stage we then need to reinitialize velocities in the next stages
reinitialize_velocities = True
# NPT equilibration
if equil_steps_npt > 0:
if verbose:
logger.info(f"Running NPT equilibration for {equil_steps_npt} steps")
# setup the output path if we have one for the npt equilibration
if output_settings.equil_npt_structure is not None:
output_path = shared_basepath / output_settings.equil_npt_structure
else:
output_path = None
PlainMDSimulationUnit._run_dynamics(
simulation=simulation,
steps=equil_steps_npt,
temperature=temperature,
barostat_frequency=barostat_frequency,
output_settings=output_settings,
verbose=verbose,
output_path=output_path,
reinitialize_velocities=reinitialize_velocities,
)
# the production stage can use these same velocities
reinitialize_velocities = False
# production
if verbose:
logger.info(f"Running production phase for {prod_steps} steps")
# Setup the reporters
write_interval = settings_validation.divmod_time_and_check(
output_settings.trajectory_write_interval,
timestep,
"trajectory_write_interval",
"timestep",
)
if output_settings.production_trajectory_filename:
# Get the sub selection of the system to save coords for
selection_indices = mdtraj.Topology.from_openmm(simulation.topology).select(
output_settings.output_indices
)
xtc_reporter = XTCReporter(
file=str(shared_basepath / output_settings.production_trajectory_filename),
reportInterval=write_interval,
atomSubset=selection_indices,
# append to the trajectory if restarting and we have run the production stage before
append=production_started,
)
simulation.reporters.append(xtc_reporter)
if output_settings.log_output:
simulation.reporters.append(
openmm.app.StateDataReporter(
str(shared_basepath / output_settings.log_output),
checkpoint_interval,
step=True,
time=True,
potentialEnergy=True,
kineticEnergy=True,
totalEnergy=True,
temperature=True,
volume=True,
density=True,
speed=True,
append=production_started,
)
)
PlainMDSimulationUnit._run_dynamics(
simulation=simulation,
steps=prod_steps,
temperature=temperature,
barostat_frequency=barostat_frequency,
output_settings=output_settings,
verbose=verbose,
output_path=None, # the trajectory is saved for the production run so don't save again
reinitialize_velocities=reinitialize_velocities,
)
def run(
self,
*,
system: openmm.System,
positions: openmm.unit.Quantity,
topology: openmm.app.Topology,
equil_steps_nvt: int | None,
equil_steps_npt: int,
prod_steps: int,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Run the MD simulation.
Parameters
----------
system : openmm.System
The System to simulate.
positions : openmm.unit.Quantity
The positions of the System.
topology: openmm.app.Topology
The topology of the System.
equil_steps_nvt : int
The number of nvt equilibration steps.
equil_steps_npt : int
The number of npt equilibration steps.
prod_steps : int
The number of production steps.
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: Pathlike, optional
Where to store temporary files, defaults to current working directory
shared_basepath : Pathlike, optional
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
Raises
------
error
Exception if anything failed
"""
# Prepare paths and set verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
# Extract relevant settings
protocol_settings: PlainMDProtocolSettings = self._inputs["protocol"].settings
forcefield_settings: settings.OpenMMSystemGeneratorFFSettings = (
protocol_settings.forcefield_settings
)
thermo_settings: settings.ThermoSettings = protocol_settings.thermo_settings
sim_settings: MDSimulationSettings = protocol_settings.simulation_settings
output_settings: MDOutputSettings = protocol_settings.output_settings
timestep = protocol_settings.integrator_settings.timestep
integrator_settings = protocol_settings.integrator_settings
# Get platform
restrict_cpu = forcefield_settings.nonbonded_method.lower() == "nocutoff"
platform = omm_compute.get_openmm_platform(
platform_name=protocol_settings.engine_settings.compute_platform,
gpu_device_index=protocol_settings.engine_settings.gpu_device_index,
restrict_cpu_count=restrict_cpu,
)
# Set the integrator
integrator = openmm.LangevinMiddleIntegrator(
to_openmm(thermo_settings.temperature),
to_openmm(integrator_settings.langevin_collision_rate),
to_openmm(timestep),
)
# Build the simulation
simulation = openmm.app.Simulation(
topology,
system,
integrator,
platform,
)
try:
if not dry: # pragma: no-cover
# check for a restart
restart = self._check_restart(output_settings, self.shared_basepath)
# start the simulation
self._run_MD(
simulation,
positions,
sim_settings,
output_settings,
thermo_settings.temperature,
integrator_settings.barostat_frequency,
timestep,
equil_steps_nvt,
equil_steps_npt,
prod_steps,
shared_basepath=self.shared_basepath,
restart=restart,
verbose=self.verbose,
)
finally:
if not dry:
del integrator, simulation
if not dry: # pragma: no-cover
output = {
"system_pdb": self.shared_basepath / output_settings.preminimized_structure,
"minimized_pdb": self.shared_basepath / output_settings.minimized_structure,
"nc": self.shared_basepath / output_settings.production_trajectory_filename,
"last_checkpoint": self.shared_basepath
/ output_settings.checkpoint_storage_filename,
}
# The checkpoint file can not exist if frequency > sim length
if not output["last_checkpoint"].exists():
output["last_checkpoint"] = None
# The NVT PDB can be omitted if we don't run the simulation
# Note: we could also just check the file exist
if (
output_settings.equil_nvt_structure
and sim_settings.equilibration_length_nvt is not None
):
output["nvt_equil_pdb"] = self.shared_basepath / output_settings.equil_nvt_structure
else:
output["nvt_equil_pdb"] = None
if output_settings.equil_npt_structure:
output["npt_equil_pdb"] = self.shared_basepath / output_settings.equil_npt_structure
else:
output["npt_equil_pdb"] = None
return output
else:
return {"debug": {"system": system}}
def _execute(
self,
ctx: gufe.Context,
setup_results,
**kwargs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure that the environment hasn't changed
self._verify_execution_environment(setup_results.outputs)
# Get the relevant inputs for running the unit
system = serialization.deserialize(setup_results.outputs["system"])
positions = (
np.load(setup_results.outputs["positions"]) * omm_unit.nanometers
) # convert to openmm units
topology = openmm.app.PDBFile(str(setup_results.outputs["system_pdb"])).getTopology()
equil_steps_nvt = setup_results.outputs["equil_steps_nvt"]
equil_steps_npt = setup_results.outputs["equil_steps_npt"]
prod_steps = setup_results.outputs["prod_steps"]
outputs = self.run(
system=system,
positions=positions,
topology=topology,
equil_steps_nvt=equil_steps_nvt,
equil_steps_npt=equil_steps_npt,
prod_steps=prod_steps,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
**outputs,
}
================================================
FILE: src/openfe/protocols/openmm_md/plain_md_settings.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Settings class for plain MD Protocols using OpenMM + OpenMMTools
This module implements the settings necessary to run MD simulations using
:class:`openfe.protocols.openmm_md.plain_md_methods.py`
"""
from gufe.settings import OpenMMSystemGeneratorFFSettings, SettingsBaseModel
from pydantic import ConfigDict, field_validator
from openfe.protocols.openmm_utils.omm_settings import (
IntegratorSettings,
MDOutputSettings,
MDSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
Settings,
)
class PlainMDProtocolSettings(Settings):
model_config = ConfigDict(arbitrary_types_allowed=True)
protocol_repeats: int
"""
Number of independent MD runs to perform.
"""
@field_validator("protocol_repeats")
def must_be_positive(cls, v):
if v <= 0:
errmsg = f"protocol_repeats must be a positive value, got {v}."
raise ValueError(errmsg)
return v
# Things for creating the systems
forcefield_settings: OpenMMSystemGeneratorFFSettings
partial_charge_settings: OpenFFPartialChargeSettings
solvation_settings: OpenMMSolvationSettings
# MD Engine things
engine_settings: OpenMMEngineSettings
# Sampling State defining things
integrator_settings: IntegratorSettings
# Simulation run settings
simulation_settings: MDSimulationSettings
# Simulations output settings
output_settings: MDOutputSettings
================================================
FILE: src/openfe/protocols/openmm_rfe/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
from . import _rfe_utils
from .equil_rfe_settings import RelativeHybridTopologyProtocolSettings
from .hybridtop_protocol_results import RelativeHybridTopologyProtocolResult
from .hybridtop_protocols import RelativeHybridTopologyProtocol
from .hybridtop_units import (
HybridTopologyMultiStateAnalysisUnit,
HybridTopologyMultiStateSimulationUnit,
HybridTopologySetupUnit,
)
================================================
FILE: src/openfe/protocols/openmm_rfe/_rfe_utils/__init__.py
================================================
from . import (
lambdaprotocol,
multistate,
relative,
topologyhelpers,
)
================================================
FILE: src/openfe/protocols/openmm_rfe/_rfe_utils/lambdaprotocol.py
================================================
# Very slightly adapted from perses https://github.com/choderalab/perses
# License: MIT
# OpenFE note: eventually we aim to move this to openmmtools where possible
# turn off formatting since this is mostly vendored code
# fmt: off
import copy
import warnings
import numpy as np
from openmmtools.alchemy import AlchemicalState
class LambdaProtocol(object):
"""Protocols for perturbing each of the component energy terms in alchemical
free energy simulations.
TODO
----
* Class needs cleaning up and made more consistent
"""
default_functions = {'lambda_sterics_core':
lambda x: x,
'lambda_electrostatics_core':
lambda x: x,
'lambda_sterics_insert':
lambda x: 2.0 * x if x < 0.5 else 1.0,
'lambda_sterics_delete':
lambda x: 0.0 if x < 0.5 else 2.0 * (x - 0.5),
'lambda_electrostatics_insert':
lambda x: 0.0 if x < 0.5 else 2.0 * (x - 0.5),
'lambda_electrostatics_delete':
lambda x: 2.0 * x if x < 0.5 else 1.0,
'lambda_bonds':
lambda x: x,
'lambda_angles':
lambda x: x,
'lambda_torsions':
lambda x: x
}
# lambda components for each component,
# all run from 0 -> 1 following master lambda
def __init__(self, functions='default', windows=10, lambda_schedule=None):
"""Instantiates lambda protocol to be used in a free energy
calculation. Can either be user defined, by passing in a dict, or using
one of the pregenerated sets by passing in a string 'default', 'namd'
or 'quarters'
All protocols must begin and end at 0 and 1 respectively. Any energy
term not defined in `functions` dict will be set to the function in
`default_functions`
Pre-coded options:
default : ele and LJ terms of the old system are turned off between
0.0 -> 0.5 ele and LJ terms of the new system are turned on between
0.5 -> 1.0 core terms treated linearly
quarters : 0.25 of the protocol is used in turn to individually change
the (a) off old ele, (b) off old sterics, (c) on new sterics (d) on new
ele core terms treated linearly
namd : follows the protocol outlined here:
https://pubs.acs.org/doi/full/10.1021/acs.jcim.9b00362#
Jiang, Wei, Christophe Chipot, and Benoît Roux. "Computing Relative
Binding Affinity of Ligands to Receptor: An Effective Hybrid
Single-Dual-Topology Free-Energy Perturbation Approach in NAMD."
Journal of chemical information and modeling 59.9 (2019): 3794-3802.
ele-scaled : all terms are treated as in default, except for the old
and new ele these are scaled with lambda^0.5, so as to be linear in
energy, rather than lambda
Parameters
----------
functions : str or dict
One of the predefined lambda protocols
['default','namd','quarters'] or a dictionary. Default "default".
windows : int
Number of windows which this lambda schedule is intended to be used
with. This value is used to validate the lambda function.
lambda_schedule : list of floats
Schedule of lambda windows to be sampled. If ``None`` will default
to a linear spacing of windows as defined by
``np.linspace(0. ,1. ,windows)``. Default ``None``.
Attributes
----------
functions : dict
Lambda protocol to be used.
lambda_schedule : list
Schedule of windows to be sampled.
"""
self.functions = copy.deepcopy(functions)
# set the lambda schedule
self.lambda_schedule = self._validate_schedule(lambda_schedule,
windows)
if lambda_schedule:
self.lambda_schedule = lambda_schedule
else:
self.lambda_schedule = np.linspace(0., 1., windows)
if isinstance(self.functions, dict):
self.type = 'user-defined'
elif isinstance(self.functions, str):
self.functions = None # will be set later
self.type = functions
if self.functions is None:
if self.type == 'default':
self.functions = copy.deepcopy(
LambdaProtocol.default_functions)
elif self.type == 'namd':
self.functions = {
'lambda_sterics_core': lambda x: x,
'lambda_electrostatics_core': lambda x: x,
'lambda_sterics_insert': lambda x: (3. / 2.) * x if x < (2. / 3.) else 1.0,
'lambda_sterics_delete': lambda x: 0.0 if x < (1. / 3.) else (x - (1. / 3.)) * (3. / 2.),
'lambda_electrostatics_insert': lambda x: 0.0 if x < 0.5 else 2.0 * (x - 0.5),
'lambda_electrostatics_delete': lambda x: 2.0 * x if x < 0.5 else 1.0,
'lambda_bonds': lambda x: x,
'lambda_angles': lambda x: x,
'lambda_torsions': lambda x: x
}
elif self.type == 'quarters':
self.functions = {
'lambda_sterics_core': lambda x: x,
'lambda_electrostatics_core': lambda x: x,
'lambda_sterics_insert': lambda x: 0. if x < 0.5 else 1 if x > 0.75 else 4 * (x - 0.5),
'lambda_sterics_delete': lambda x: 0. if x < 0.25 else 1 if x > 0.5 else 4 * (x - 0.25),
'lambda_electrostatics_insert': lambda x: 0. if x < 0.75 else 4 * (x - 0.75),
'lambda_electrostatics_delete': lambda x: 4.0 * x if x < 0.25 else 1.0,
'lambda_bonds': lambda x: x,
'lambda_angles': lambda x: x,
'lambda_torsions': lambda x: x
}
elif self.type == 'ele-scaled':
self.functions = {
'lambda_electrostatics_insert': lambda x: 0.0 if x < 0.5 else ((2*(x-0.5))**0.5),
'lambda_electrostatics_delete': lambda x: (2*x)**2 if x < 0.5 else 1.0
}
elif self.type == 'user-defined':
self.functions = functions
else:
errmsg = f"LambdaProtocol type : {self.type} not recognised "
raise ValueError(errmsg)
self._validate_functions(n=windows)
self._check_for_naked_charges()
@staticmethod
def _validate_schedule(schedule, windows):
"""
Checks that the input lambda schedule is valid.
Rules are:
- Must begin at 0 and end at 1
- Must be monotonically increasing
Parameters
----------
schedule : list of floats
The lambda schedule. If ``None`` the method returns
``np.linspace(0. ,1. ,windows)``.
windows : int
Number of windows to be sampled.
Returns
-------
schedule : list of floats
A valid lambda schedule.
"""
if schedule is None:
return np.linspace(0., 1., windows)
# Check end states
if schedule[0] != 0 or schedule[-1] != 1:
errmsg = ("end and start lambda windows must be lambda 0 and 1 "
"respectively")
raise ValueError(errmsg)
# Check monotonically increasing
difference = np.diff(schedule)
if not all(i >= 0. for i in difference):
errmsg = "The lambda schedule is not monotonic"
raise ValueError(errmsg)
return schedule
def _validate_functions(self, n=10):
"""Ensures that all the lambda functions adhere to the rules:
- must begin at 0.
- must finish at 1.
- must be monotonically increasing
Parameters
----------
n : int, default 10
number of grid points used to check monotonicity
"""
# the individual lambda functions that must be defined for
required_functions = list(LambdaProtocol.default_functions.keys())
for function in required_functions:
if function not in self.functions:
# IA switched from warn to error here
errmsg = (f"function {function} is missing from "
"self.lambda_functions.")
raise ValueError(errmsg)
# Check that the function starts and ends at 0 and 1 respectively
if self.functions[function](0) != 0:
raise ValueError("lambda functions must start at 0")
if self.functions[function](1) != 1:
raise ValueError("lambda functions must end at 1")
# now validatate that it's monotonic
global_lambda = np.linspace(0., 1., n)
sub_lambda = [self.functions[function](lam) for
lam in global_lambda]
difference = np.diff(sub_lambda)
if not all(i >= 0. for i in difference):
wmsg = (f"The function {function} is not monotonic as "
"typically expected.")
warnings.warn(wmsg)
def _check_for_naked_charges(self):
"""
Checks that there are no cases where atoms have charge but no sterics.
This avoids issues with singularities and/or excessive forces near
the end states (even when using softcore electrostatics).
"""
global_lambda = self.lambda_schedule
def check_overlap(ele, sterics, global_lambda, functions, endstate):
for lam in global_lambda:
ele_val = functions[ele](lam)
ster_val = functions[sterics](lam)
# if charge > 0 and sterics == 0 raise error
if ele_val != endstate and ster_val == endstate:
errmsg = ("There are states along this lambda schedule "
"where there are atoms with charges but no LJ "
f"interactions: {lam} {ele_val} {ster_val}")
raise ValueError(errmsg)
# checking unique new terms first
ele = 'lambda_electrostatics_insert'
sterics = 'lambda_sterics_insert'
check_overlap(ele, sterics, global_lambda, self.functions, endstate=0)
# checking unique old terms now
ele = 'lambda_electrostatics_delete'
sterics = 'lambda_sterics_delete'
check_overlap(ele, sterics, global_lambda, self.functions, endstate=1)
def get_functions(self):
return self.functions
def plot_functions(self, lambda_schedule=None):
"""
Plot the function for ease of visualisation.
Parameters
----------
schedule : np.ndarray
The lambda schedule to plot the function along. If ``None`` plot
the one stored within this class. Default ``None``.
"""
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(10, 5))
global_lambda = lambda_schedule if lambda_schedule else self.lambda_schedule
for f in self.functions:
plt.plot(global_lambda,
[self.functions[f](lam) for lam in global_lambda],
alpha=0.5, label=f)
plt.xlabel('global lambda')
plt.ylabel('sub-lambda')
plt.legend()
plt.show()
class RelativeAlchemicalState(AlchemicalState):
"""
Relative AlchemicalState to handle all lambda parameters required for
relative perturbations
lambda = 1 refers to ON, i.e. fully interacting while
lambda = 0 refers to OFF, i.e. non-interacting with the system
all lambda functions will follow from 0 -> 1 following the master lambda
lambda*core parameters perturb linearly
lambda_sterics_insert and lambda_electrostatics_delete perturb in the
first half of the protocol 0 -> 0.5
lambda_sterics_delete and lambda_electrostatics_insert perturb in the
second half of the protocol 0.5 -> 1
Attributes
----------
lambda_sterics_core
lambda_electrostatics_core
lambda_sterics_insert
lambda_sterics_delete
lambda_electrostatics_insert
lambda_electrostatics_delete
"""
class _LambdaParameter(AlchemicalState._LambdaParameter):
pass
lambda_sterics_core = _LambdaParameter('lambda_sterics_core')
lambda_electrostatics_core = _LambdaParameter('lambda_electrostatics_core')
lambda_sterics_insert = _LambdaParameter('lambda_sterics_insert')
lambda_sterics_delete = _LambdaParameter('lambda_sterics_delete')
lambda_electrostatics_insert = _LambdaParameter(
'lambda_electrostatics_insert')
lambda_electrostatics_delete = _LambdaParameter(
'lambda_electrostatics_delete')
def set_alchemical_parameters(self, global_lambda,
lambda_protocol=LambdaProtocol()):
"""Set each lambda value according to the lambda_functions protocol.
The undefined parameters (i.e. those being set to None) remain
undefined.
Parameters
----------
lambda_value : float
The new value for all defined parameters.
"""
self.global_lambda = global_lambda
for parameter_name in lambda_protocol.functions:
lambda_value = lambda_protocol.functions[parameter_name](global_lambda)
setattr(self, parameter_name, lambda_value)
================================================
FILE: src/openfe/protocols/openmm_rfe/_rfe_utils/multistate.py
================================================
#############################################################################
# HYBRID SYSTEM SAMPLERS
#############################################################################
"""
This is adapted from Perses: https://github.com/choderalab/perses/
See here for the license: https://github.com/choderalab/perses/blob/main/LICENSE
"""
# turn off formatting since this is mostly vendored code
# fmt: off
import copy
import logging
import warnings
import numpy as np
import openmm
import openmmtools.states as states
from openmm import unit
from openmmtools import cache
from openmmtools.integrators import FIREMinimizationIntegrator
from openmmtools.multistate import multistatesampler, replicaexchange, sams
from openmmtools.states import CompoundThermodynamicState, SamplerState, ThermodynamicState
from .lambdaprotocol import RelativeAlchemicalState
logger = logging.getLogger(__name__)
class HybridCompatibilityMixin:
"""
Mixin that allows the MultistateSampler to accommodate the situation where
unsampled endpoints have a different number of degrees of freedom.
"""
def __init__(
self,
*args,
hybrid_system: openmm.System | None = None,
hybrid_positions: unit.Quantity | None = None,
**kwargs
):
self._hybrid_system = hybrid_system
self._hybrid_positions = hybrid_positions
super(HybridCompatibilityMixin, self).__init__(*args, **kwargs)
def setup(self, reporter, lambda_protocol,
temperature=298.15 * unit.kelvin, n_replicas=None,
endstates=True, minimization_steps=100,
minimization_platform="CPU"):
"""
Setup MultistateSampler based on the input lambda protocol and number
of replicas.
Parameters
----------
reporter : OpenMM reporter
Simulation reporter to attach to each simulation replica.
lambda_protocol : LambdaProtocol
The lambda protocol to be used for simulation. Default to a default
class creation of LambdaProtocol.
temperature : openmm.Quantity
Simulation temperature, default to 298.15 K
n_replicas : int
Number of replicas to simulate. Sets to the number of lambda
states (as defined by lambda_protocol) if ``None``.
Default ``None``.
endstates : bool
Whether or not to generate unsampled endstates (i.e. dispersion
correction).
minimization_steps : Optional[int]
Number of steps to pre-minimize states.
minimization_platform : str
Platform to do the initial pre-minimization with.
Attributes
----------
n_states : int
Number of states / windows which are to be sampled. Obtained from
lambda_protocol.
"""
n_states = len(lambda_protocol.lambda_schedule)
lambda_zero_state = RelativeAlchemicalState.from_system(self._hybrid_system)
thermostate = ThermodynamicState(
self._hybrid_system,
temperature=temperature
)
compound_thermostate = CompoundThermodynamicState(
thermostate,
composable_states=[lambda_zero_state]
)
# create lists for storing thermostates and sampler states
thermodynamic_state_list = []
sampler_state_list = []
if n_replicas is None:
msg = (f"setting number of replicas to number of states: {n_states}")
warnings.warn(msg)
n_replicas = n_states
elif n_replicas > n_states:
wmsg = (f"More sampler states: {n_replicas} requested than the "
f"number of available states: {n_states}. Setting "
"the number of replicas to the number of states")
warnings.warn(wmsg)
n_replicas = n_states
lambda_schedule = lambda_protocol.lambda_schedule
if len(lambda_schedule) != n_states:
errmsg = ("length of lambda_schedule must match the number of "
"states, n_states")
raise ValueError(errmsg)
# starting with the hybrid factory positions
box = self._hybrid_system.getDefaultPeriodicBoxVectors()
sampler_state = SamplerState(
self._hybrid_positions,
box_vectors=box
)
# Loop over the lambdas and create & store a compound thermostate at
# that lambda value
for lambda_val in lambda_schedule:
compound_thermostate_copy = copy.deepcopy(compound_thermostate)
compound_thermostate_copy.set_alchemical_parameters(
lambda_val,
lambda_protocol
)
thermodynamic_state_list.append(compound_thermostate_copy)
# now generating a sampler_state for each thermodyanmic state,
# with relaxed positions
# Note: remove once choderalab/openmmtools#672 is completed
minimize(compound_thermostate_copy, sampler_state,
max_iterations=minimization_steps,
platform_name=minimization_platform)
sampler_state_list.append(copy.deepcopy(sampler_state))
del compound_thermostate, sampler_state
# making sure number of sampler states equals n_replicas
if len(sampler_state_list) != n_replicas:
# picking roughly evenly spaced sampler states
# if n_replicas == 1, then it will pick the first in the list
samples = np.linspace(0, len(sampler_state_list) - 1,
n_replicas)
idx = np.round(samples).astype(int)
sampler_state_list = [state for i, state in
enumerate(sampler_state_list) if i in idx]
assert len(sampler_state_list) == n_replicas
if endstates:
# generating unsampled endstates
unsampled_dispersion_endstates = create_endstates(
copy.deepcopy(thermodynamic_state_list[0]),
copy.deepcopy(thermodynamic_state_list[-1])
)
self.create(thermodynamic_states=thermodynamic_state_list,
sampler_states=sampler_state_list, storage=reporter,
unsampled_thermodynamic_states=unsampled_dispersion_endstates)
else:
self.create(thermodynamic_states=thermodynamic_state_list,
sampler_states=sampler_state_list, storage=reporter)
class HybridRepexSampler(HybridCompatibilityMixin,
replicaexchange.ReplicaExchangeSampler):
"""
ReplicaExchangeSampler that supports unsampled end states with a different
number of positions
"""
def __init__(
self,
*args,
hybrid_system: openmm.System | None = None,
hybrid_positions: unit.Quantity | None = None,
**kwargs
):
super(HybridRepexSampler, self).__init__(
*args,
hybrid_system=hybrid_system,
hybrid_positions=hybrid_positions,
**kwargs
)
class HybridSAMSSampler(HybridCompatibilityMixin, sams.SAMSSampler):
"""
SAMSSampler that supports unsampled end states with a different number
of positions
"""
def __init__(
self,
*args,
hybrid_system: openmm.System | None = None,
hybrid_positions: unit.Quantity | None = None,
**kwargs
):
super(HybridSAMSSampler, self).__init__(
*args,
hybrid_system=hybrid_system,
hybrid_positions=hybrid_positions,
**kwargs
)
class HybridMultiStateSampler(HybridCompatibilityMixin,
multistatesampler.MultiStateSampler):
"""
MultiStateSampler that supports unsample end states with a different
number of positions
"""
def __init__(
self,
*args,
hybrid_system: openmm.System | None = None,
hybrid_positions: unit.Quantity | None = None,
**kwargs
):
super(HybridMultiStateSampler, self).__init__(
*args,
hybrid_system=hybrid_system,
hybrid_positions=hybrid_positions,
**kwargs
)
def create_endstates(first_thermostate, last_thermostate):
"""
Utility function to generate unsampled endstates
1. Move all alchemical atom LJ parameters from CustomNonbondedForce to
NonbondedForce.
2. Delete the CustomNonbondedForce.
3. Set PME tolerance to 1e-5.
4. Enable LJPME to handle long range dispersion corrections in a physically
reasonable manner.
Parameters
----------
first_thermostate : openmmtools.states.CompoundThermodynamicState
The first thermodynamic state for which an unsampled endstate will be
created.
last_thermostate : openmmtools.states.CompoundThermodynamicState
The last thermodynamic state for which an unsampled endstate will be
created.
Returns
-------
unsampled_endstates : list of openmmtools.states.CompoundThermodynamicState
The corrected unsampled endstates.
"""
unsampled_endstates = []
for master_lambda, endstate in zip([0., 1.],
[first_thermostate, last_thermostate]):
dispersion_system = endstate.get_system()
energy_unit = unit.kilocalories_per_mole
# Find the NonbondedForce (there must be only one)
forces = {force.__class__.__name__: force for
force in dispersion_system.getForces()}
# Set NonbondedForce to use LJPME
ljpme = openmm.NonbondedForce.LJPME
forces['NonbondedForce'].setNonbondedMethod(ljpme)
# Set tight PME tolerance
TIGHT_PME_TOLERANCE = 1.0e-5
forces['NonbondedForce'].setEwaldErrorTolerance(TIGHT_PME_TOLERANCE)
# Move alchemical LJ sites from CustomNonbondedForce back to
# NonbondedForce
for particle_index in range(forces['NonbondedForce'].getNumParticles()):
charge, sigma, epsilon = forces['NonbondedForce'].getParticleParameters(particle_index)
sigmaA, epsilonA, sigmaB, epsilonB, unique_old, unique_new = forces['CustomNonbondedForce'].getParticleParameters(particle_index)
if (epsilon/energy_unit == 0.0) and ((epsilonA > 0.0) or (epsilonB > 0.0)):
sigma = (1-master_lambda)*sigmaA + master_lambda*sigmaB
epsilon = (1-master_lambda)*epsilonA + master_lambda*epsilonB
forces['NonbondedForce'].setParticleParameters(
particle_index, charge,
sigma, epsilon)
# Delete the CustomNonbondedForce since we have moved all alchemical
# particles out of it
for force_index, force in enumerate(list(dispersion_system.getForces())):
if force.__class__.__name__ == 'CustomNonbondedForce':
custom_nonbonded_force_index = force_index
break
dispersion_system.removeForce(custom_nonbonded_force_index)
# Set all parameters to master lambda
for force_index, force in enumerate(list(dispersion_system.getForces())):
if hasattr(force, 'getNumGlobalParameters'):
for parameter_index in range(force.getNumGlobalParameters()):
if force.getGlobalParameterName(parameter_index)[0:7] == 'lambda_':
force.setGlobalParameterDefaultValue(parameter_index,
master_lambda)
# Store the unsampled endstate
unsampled_endstates.append(ThermodynamicState(
dispersion_system, temperature=endstate.temperature))
return unsampled_endstates
def minimize(thermodynamic_state: states.ThermodynamicState,
sampler_state: states.SamplerState,
max_iterations: int=100,
platform_name: str="CPU") -> states.SamplerState:
"""
Adapted from perses.dispersed.feptasks.minimize
Minimize the given system and state, up to a maximum number of steps.
This does not return a copy of the samplerstate; it is an update-in-place.
Parameters
----------
thermodynamic_state : openmmtools.states.ThermodynamicState
The state at which the system could be minimized
sampler_state : openmmtools.states.SamplerState
The starting state at which to minimize the system.
max_iterations : Optional[int]
The maximum number of minimization steps. Default is 100.
platform_name : str
The OpenMM platform name to carry out the minimization with.
Returns
-------
sampler_state : openmmtools.states.SamplerState
The positions and accompanying state following minimization
"""
# Only run a minimization if max_iterations is not None
if max_iterations is not None:
# we won't take any steps, so use a simple integrator
integrator = openmm.VerletIntegrator(1.0)
platform = openmm.Platform.getPlatformByName(platform_name)
dummy_cache = cache.DummyContextCache(platform=platform)
context, integrator = dummy_cache.get_context(
thermodynamic_state, integrator
)
try:
sampler_state.apply_to_context(
context, ignore_velocities=True
)
openmm.LocalEnergyMinimizer.minimize(
context, maxIterations=max_iterations
)
sampler_state.update_from_context(context)
finally:
del context, integrator, dummy_cache
================================================
FILE: src/openfe/protocols/openmm_rfe/_rfe_utils/relative.py
================================================
# This code is a slightly modified version of the HybridTopologyFactory code
# from https://github.com/choderalab/perses
# The eventual goal is to move a version of this towards openmmtools
# LICENSE: MIT
# turn off formatting since this is mostly vendored code
# fmt: off
import copy
import itertools
import logging
import mdtraj as mdt
import numpy as np
import openmm
from openmm import app, unit
# OpenMM constant for Coulomb interactions (implicitly in md_unit_system units)
from openmmtools.constants import ONE_4PI_EPS0
logger = logging.getLogger(__name__)
class HybridTopologyFactory:
"""
This class generates a hybrid topology based on two input systems and an
atom mapping. For convenience the states are called "old" and "new"
respectively, defining the starting and end states along the alchemical
transformation.
The input systems are assumed to have:
1. The total number of molecules
2. The same coordinates for equivalent atoms
Atoms in the resulting hybrid system are treated as being from one
of four possible types:
unique_old_atom : These atoms are not mapped and only present in the old
system. Their interactions will be on for lambda=0, off for lambda=1
unique_new_atom : These atoms are not mapped and only present in the new
system. Their interactions will be off for lambda=0, on for lambda=1
core_atom : These atoms are mapped between the two end states, and are
part of a residue that is changing alchemically. Their interactions
will be those corresponding to the old system at lambda=0, and those
corresponding to the new system at lambda=1
environment_atom : These atoms are mapped between the two end states, and
are not part of a residue undergoing an alchemical change. Their
interactions are always on and are alchemically unmodified.
Properties
----------
hybrid_system : openmm.System
The hybrid system for simulation
new_to_hybrid_atom_map : dict of int : int
The mapping of new system atoms to hybrid atoms
old_to_hybrid_atom_map : dict of int : int
The mapping of old system atoms to hybrid atoms
hybrid_positions : [n, 3] np.ndarray
The positions of the hybrid system
hybrid_topology : mdtraj.Topology
The topology of the hybrid system
omm_hybrid_topology : openmm.app.Topology
The OpenMM topology object corresponding to the hybrid system
.. warning :: This API is experimental and subject to change.
Notes
-----
* Logging has been removed and will be revamped at a later date.
* The ability to define custom functions has been removed for now.
* Neglected angle terms have been removed for now.
* RMSD restraint option has been removed for now.
* Endstate support has been removed for now.
* Bond softening has been removed for now.
* Unused InteractionGroup code paths have been removed.
TODO
----
* Document how positions for hybrid system are constructed.
* Allow support for annealing in omitted terms.
* Implement omitted terms (this was not available in the original class).
"""
def __init__(self,
old_system, old_positions, old_topology,
new_system, new_positions, new_topology,
old_to_new_atom_map, old_to_new_core_atom_map,
use_dispersion_correction=False,
softcore_alpha=0.5,
softcore_LJ_v2=True,
softcore_LJ_v2_alpha=0.85,
interpolate_old_and_new_14s=False):
"""
Initialize the Hybrid topology factory.
Parameters
----------
old_system : openmm.System
OpenMM system defining the "old" (i.e. starting) state.
old_positions : [n,3] np.ndarray of float
The positions of the "old system".
old_topology : openmm.Topology
OpenMM topology defining the "old" state.
new_system: opemm.System
OpenMM system defining the "new" (i.e. end) state.
new_positions : [m,3] np.ndarray of float
The positions of the "new system"
new_topology : openmm.Topology
OpenMM topology defining the "new" state.
old_to_new_atom_map : dict of int : int
Dictionary of corresponding atoms between the old and new systems.
Unique atoms are not included in this atom map.
old_to_new_core_atom_map : dict of int : int
Dictionary of corresponding atoms between the alchemical "core
atoms" (i.e. residues which are changing) between the old and
new systems.
use_dispersion_correction : bool, default False
Whether to use the long range correction in the custom sterics
force. This can be very expensive for NCMC.
softcore_alpha: float, default None
"alpha" parameter of softcore sterics, default 0.5.
softcore_LJ_v2 : bool, default True
Implement the softcore LJ as defined by Gapsys et al. JCTC 2012.
softcore_LJ_v2_alpha : float, default 0.85
Softcore alpha parameter for LJ v2
interpolate_old_and_new_14s : bool, default False
Whether to turn off interactions for new exceptions (not just
1,4s) at lambda = 0 and old exceptions at lambda = 1; if False,
they are present in the nonbonded force.
"""
# Assign system positions and force
# IA - Are deep copies really needed here?
self._old_system = copy.deepcopy(old_system)
self._old_positions = old_positions
self._old_topology = old_topology
self._new_system = copy.deepcopy(new_system)
self._new_positions = new_positions
self._new_topology = new_topology
self._hybrid_system_forces = dict()
# Set mappings (full, core, and env maps)
self._set_mappings(old_to_new_atom_map, old_to_new_core_atom_map)
# Other options
self._use_dispersion_correction = use_dispersion_correction
self._interpolate_14s = interpolate_old_and_new_14s
# Sofcore options
self._softcore_alpha = softcore_alpha
self._check_bounds(softcore_alpha, "softcore_alpha") # [0,1] check
self._softcore_LJ_v2 = softcore_LJ_v2
if self._softcore_LJ_v2:
self._check_bounds(softcore_LJ_v2_alpha, "softcore_LJ_v2_alpha")
self._softcore_LJ_v2_alpha = softcore_LJ_v2_alpha
# TODO: end __init__ here and move everything else to
# create_hybrid_system() or equivalent
self._check_and_store_system_forces()
logger.info("Creating hybrid system")
# Create empty system that will become the hybrid system
self._hybrid_system = openmm.System()
# Add particles to system
self._add_particles()
# Add box + barostat
self._handle_box()
# Assign atoms to one of the classes described in the class docstring
# Renamed from original _determine_atom_classes
self._set_atom_classes()
# Construct dictionary of exceptions in old and new systems
self._old_system_exceptions = self._generate_dict_from_exceptions(
self._old_system_forces['NonbondedForce'])
self._new_system_exceptions = self._generate_dict_from_exceptions(
self._new_system_forces['NonbondedForce'])
# check for exceptions clashes between unique and env atoms
self._validate_disjoint_sets()
logger.info("Setting force field terms")
# Copy constraints, checking to make sure they are not changing
self._handle_constraints()
# Copy over relevant virtual sites - pick up refactor from here
self._handle_virtual_sites()
# TODO - move to a single method call? Would be good to group these
# Call each of the force methods to add the corresponding force terms
# and prepare the forces:
self._add_bond_force_terms()
self._add_angle_force_terms()
self._add_torsion_force_terms()
has_nonbonded_force = ('NonbondedForce' in self._old_system_forces or
'NonbondedForce' in self._new_system_forces)
if has_nonbonded_force:
self._add_nonbonded_force_terms()
# Call each force preparation method to generate the actual
# interactions that we need:
logger.info("Adding forces")
self._handle_harmonic_bonds()
self._handle_harmonic_angles()
self._handle_periodic_torsion_force()
# add cmap terms if possible
self._handle_cmap_torsion_force()
if has_nonbonded_force:
self._handle_nonbonded()
if not (len(self._old_system_exceptions.keys()) == 0 and
len(self._new_system_exceptions.keys()) == 0):
self._handle_old_new_exceptions()
# Get positions for the hybrid
self._hybrid_positions = self._compute_hybrid_positions()
# Get an MDTraj topology for writing
self._hybrid_topology = self._create_mdtraj_topology()
self._omm_hybrid_topology = self._create_hybrid_topology()
logger.info("Hybrid system created")
@staticmethod
def _verify_cmap_compatibility(
cmap_old: openmm.CMAPTorsionForce,
cmap_new: openmm.CMAPTorsionForce,
) -> tuple[
int,
int,
int,
int,
]:
"""
Verify CMAPTorsionForce compatibility between two systems.
Parameters
----------
cmap_old : openmm.CMAPTorsionForce
CMAPTorsionForce from the old system
cmap_new : openmm.CMAPTorsionForce
CMAPTorsionForce from the new system
Returns
-------
tuple
(old_num_maps, new_num_maps, old_num_torsions, new_num_torsions)
four integers describing the number of maps and
torsions in each force.
Raises
------
RuntimeError
If only one of the forces is present, or if the number of maps or the
number of torsions differs between the two forces.
"""
logger.info("CMAPTorsionForce found checking compatibility")
# some quick checks on compatibility like the number of maps and total number of terms
old_num_maps = cmap_old.getNumMaps()
new_num_maps = cmap_new.getNumMaps()
if old_num_maps != new_num_maps:
raise RuntimeError(
f"Incompatible CMAPTorsionForce between end states expected to have same number of maps, "
f"found old: {old_num_maps} and new: {new_num_maps}")
old_num_torsions = cmap_old.getNumTorsions()
new_num_torsions = cmap_new.getNumTorsions()
if old_num_torsions != new_num_torsions:
raise RuntimeError(
f"Incompatible CMAPTorsionForce between end states expected to have same number of torsions, "
f"found old: {old_num_torsions} and new: {new_num_torsions}")
return old_num_maps, new_num_maps, old_num_torsions, new_num_torsions
def _handle_cmap_torsion_force(self):
"""
This method does the following in order:
- Some basic checks that the CMAPTorsionForce exists in both old/new systems.
- Adds the CMAPTorsionForce from the old system
- Checks that the new system CMAPTorsionForce terms are equal to the old system's (we do not allow for alchemically changing CMAP terms).
"""
cmap_old = self._old_system_forces.get("CMAPTorsionForce", None)
cmap_new = self._new_system_forces.get("CMAPTorsionForce", None)
# if only one has cmap raise an error
if (cmap_new is None) ^ (cmap_old is None):
raise RuntimeError(f"Inconsistent CMAPTorsionForce between end states expected to be present in both"
f"but found in old: {bool(cmap_old)} and new: {bool(cmap_new)}")
if cmap_new == cmap_old is None:
logger.info("No CMAPTorsionForce found. Skipping adding force.")
return
# verify compatibility and extract numbers of maps and torsions
(
old_num_maps,
new_num_maps,
old_num_torsions,
new_num_torsions
) = self._verify_cmap_compatibility(
cmap_old, cmap_new
)
logger.info("Adding CMAPTorsionForce to hybrid system")
# start looping through the old system terms and add them to the hybrid system
# track the terms we add so we can cross compare with the new system and also make sure we don't hit
# an index in the alchemical region
hybrid_cmap_force = openmm.CMAPTorsionForce()
self._hybrid_system.addForce(hybrid_cmap_force)
self._hybrid_system_forces["cmap_torsion_force"] = hybrid_cmap_force
old_system_maps = {}
old_system_terms = {}
logger.info("Adding CMAP forces")
# add all the old maps
for i in range(old_num_maps):
size, energy = cmap_old.getMapParameters(i)
old_system_maps[i] = (size, energy)
# also add the map to the hybrid system
hybrid_cmap_force.addMap(size, energy)
logger.info("Adding CMAP force terms")
# now add the terms we need to map from the old to the new index
old_to_hybrid_index = self._old_to_hybrid_map
new_to_hybrid_index = self._new_to_hybrid_map
for i in range(old_num_torsions):
# get the parameters for the torsion using the same notation as OpenMM
map_index, a1, a2, a3, a4, b1, b2, b3, b4 = cmap_old.getTorsionParameters(i)
atom_ids = [a1, a2, a3, a4, b1, b2, b3, b4]
# map to hybrid indices
hybrid_atom_ids = [old_to_hybrid_index[a_id] for a_id in atom_ids]
# add to the hybrid system using the hybrid index
hybrid_cmap_force.addTorsion(map_index, *hybrid_atom_ids)
# track the atoms we add in the hybrid system to cross compare with new system
old_system_terms[tuple(hybrid_atom_ids)] = map_index
# gather all alchemical atoms, use a copy so we don't change the groups
alchemical_atoms = self._atom_classes["core_atoms"].copy()
alchemical_atoms.update(self._atom_classes["unique_old_atoms"], self._atom_classes["unique_new_atoms"])
# check if any of the atoms added are in the alchemical region
old_added_atoms = {atom_id for atoms in old_system_terms.keys() for atom_id in atoms}
if overlap_atoms := alchemical_atoms.intersection(old_added_atoms):
raise RuntimeError(
f"Incompatible CMAPTorsionForce term found in alchemical region for old system atoms {overlap_atoms}")
# now loop over the new system force and check the terms are compatible
# we expect to add no new terms
for i in range(new_num_maps):
size, energy = cmap_new.getMapParameters(i)
if (size, energy) != old_system_maps[i]:
raise RuntimeError(f"Incompatible CMAPTorsionForce map parameters found between end states for map {i} "
f"expected {old_system_maps[i]} found {(size, energy)}")
for i in range(new_num_torsions):
map_index, a1, a2, a3, a4, b1, b2, b3, b4 = cmap_new.getTorsionParameters(i)
atom_ids = [a1, a2, a3, a4, b1, b2, b3, b4]
# map to hybrid indices
hybrid_atom_ids = [new_to_hybrid_index[a_id] for a_id in atom_ids]
# check its in the old system terms
if tuple(hybrid_atom_ids) not in old_system_terms.keys():
raise RuntimeError(
f"Incompatible CMAPTorsionForce term found between end states for atoms {hybrid_atom_ids} "
f"not found in old system terms.")
# check the map index is the same
if map_index != old_system_terms[tuple(hybrid_atom_ids)]:
raise RuntimeError(
f"Incompatible CMAPTorsionForce map index found between end states for atoms {hybrid_atom_ids} "
f"expected {old_system_terms[tuple(hybrid_atom_ids)]} found {map_index}")
logger.info("CMAPTorsionForce added to the hybrid system")
@staticmethod
def _check_bounds(value, varname, minmax=(0, 1)):
"""
Convenience method to check the bounds of a value.
Parameters
----------
value : float
Value to evaluate.
varname : str
Name of value to raise in error message
minmax : tuple
Two element tuple with the lower and upper bounds to check.
Raises
------
AssertionError
If value is lower or greater than bounds.
"""
if value < minmax[0] or value > minmax[1]:
raise AssertionError(f"{varname} is not in {minmax}")
@staticmethod
def _invert_dict(dictionary):
"""
Convenience method to invert a dictionary (since we do it so often).
Parameters:
----------
dictionary : dict
Dictionary you want to invert
"""
return {v: k for k, v in dictionary.items()}
def _set_mappings(self, old_to_new_map, core_old_to_new_map):
"""
Parameters
----------
old_to_new_map : dict of int : int
Dictionary mapping atoms between the old and new systems.
Notes
-----
* For now this directly sets the system, core and env old_to_new_map,
new_to_old_map, an empty new_to_hybrid_map and an empty
old_to_hybrid_map. In the future this will be moved to the one
dictionary to make things a lot less confusing.
"""
self._old_to_new_map = old_to_new_map
self._core_old_to_new_map = core_old_to_new_map
self._new_to_old_map = self._invert_dict(old_to_new_map)
self._core_new_to_old_map = self._invert_dict(core_old_to_new_map)
self._old_to_hybrid_map = {}
self._new_to_hybrid_map = {}
# Get unique atoms
# old system first
self._unique_old_atoms = []
for particle_idx in range(self._old_system.getNumParticles()):
if particle_idx not in self._old_to_new_map.keys():
self._unique_old_atoms.append(particle_idx)
self._unique_new_atoms = []
for particle_idx in range(self._new_system.getNumParticles()):
if particle_idx not in self._new_to_old_map.keys():
self._unique_new_atoms.append(particle_idx)
# Get env atoms (i.e. atoms mapped not in core)
self._env_old_to_new_map = {}
for key, value in old_to_new_map.items():
if key not in self._core_old_to_new_map.keys():
self._env_old_to_new_map[key] = value
self._env_new_to_old_map = self._invert_dict(self._env_old_to_new_map)
# IA - Internal check for now (move to test later)
num_env = len(self._env_old_to_new_map.keys())
num_core = len(self._core_old_to_new_map.keys())
num_total = len(self._old_to_new_map.keys())
assert num_env + num_core == num_total
def _check_and_store_system_forces(self):
"""
Conveniently stores the system forces and checks that no unknown
forces exist.
"""
def _check_unknown_forces(forces, system_name):
# TODO: double check that CMMotionRemover is ok being here
known_forces = {
'HarmonicBondForce', 'HarmonicAngleForce',
'PeriodicTorsionForce', 'NonbondedForce',
'MonteCarloBarostat', 'CMMotionRemover',
'CMAPTorsionForce', 'MonteCarloMembraneBarostat',
}
force_names = forces.keys()
unknown_forces = set(force_names) - set(known_forces)
if unknown_forces:
errmsg = (f"Unknown forces {unknown_forces} encountered in "
f"{system_name} system")
raise ValueError(errmsg)
# Prepare dicts of forces, which will be useful later
# TODO: Store this as self._system_forces[name], name in ('old',
# 'new', 'hybrid') for compactness
self._old_system_forces = {type(force).__name__: force for force in
self._old_system.getForces()}
_check_unknown_forces(self._old_system_forces, 'old')
self._new_system_forces = {type(force).__name__: force for force in
self._new_system.getForces()}
_check_unknown_forces(self._new_system_forces, 'new')
# TODO: check if this is actually used much, otherwise ditch it
# Get and store the nonbonded method from the system:
self._nonbonded_method = self._old_system_forces['NonbondedForce'].getNonbondedMethod()
def _add_particles(self):
"""
Adds particles to the hybrid system.
This does not copy over interactions, but does copy over the masses.
Note
----
* If there is a difference in masses between the old and new systems
the average mass of the two is used.
TODO
----
* Review influence of lack of mass scaling.
"""
# Begin by copying all particles in the old system
for particle_idx in range(self._old_system.getNumParticles()):
mass_old = self._old_system.getParticleMass(particle_idx)
if particle_idx in self._old_to_new_map.keys():
particle_idx_new_system = self._old_to_new_map[particle_idx]
mass_new = self._new_system.getParticleMass(
particle_idx_new_system)
# Take the average of the masses if the atom is mapped
particle_mass = (mass_old + mass_new) / 2
else:
particle_mass = mass_old
hybrid_idx = self._hybrid_system.addParticle(particle_mass)
self._old_to_hybrid_map[particle_idx] = hybrid_idx
# If the particle index in question is mapped, make sure to add it
# to the new to hybrid map as well.
if particle_idx in self._old_to_new_map.keys():
self._new_to_hybrid_map[particle_idx_new_system] = hybrid_idx
# Next, add the remaining unique atoms from the new system to the
# hybrid system and map accordingly.
for particle_idx in self._unique_new_atoms:
particle_mass = self._new_system.getParticleMass(particle_idx)
hybrid_idx = self._hybrid_system.addParticle(particle_mass)
self._new_to_hybrid_map[particle_idx] = hybrid_idx
# Create the opposite atom maps for later use (nonbonded processing)
self._hybrid_to_old_map = self._invert_dict(self._old_to_hybrid_map)
self._hybrid_to_new_map = self._invert_dict(self._new_to_hybrid_map)
def _handle_box(self):
"""
Copies over the barostat and box vectors as necessary.
"""
# Check that if there is a barostat in the old system,
# it is added to the hybrid system
present_barostat = [
i for i in self._old_system_forces.keys()
if i in ["MonteCarloBarostat", "MonteCarloMembraneBarostat"]
]
if len(present_barostat) == 1:
barostat = copy.deepcopy(
self._old_system_forces[present_barostat[0]])
self._hybrid_system.addForce(barostat)
elif len(present_barostat) > 1:
errmsg = "More than 1 barostat are present which is not supported"
raise ValueError(errmsg)
# Copy over the box vectors from the old system
box_vectors = self._old_system.getDefaultPeriodicBoxVectors()
self._hybrid_system.setDefaultPeriodicBoxVectors(*box_vectors)
def _set_atom_classes(self):
"""
This method determines whether each atom belongs to unique old,
unique new, core, or environment, as defined in the class docstring.
All indices are indices in the hybrid system.
"""
self._atom_classes = {'unique_old_atoms': set(),
'unique_new_atoms': set(),
'core_atoms': set(),
'environment_atoms': set()}
# First, find the unique old atoms
for atom_idx in self._unique_old_atoms:
hybrid_idx = self._old_to_hybrid_map[atom_idx]
self._atom_classes['unique_old_atoms'].add(hybrid_idx)
# Then the unique new atoms
for atom_idx in self._unique_new_atoms:
hybrid_idx = self._new_to_hybrid_map[atom_idx]
self._atom_classes['unique_new_atoms'].add(hybrid_idx)
# The core atoms:
for new_idx, old_idx in self._core_new_to_old_map.items():
new_to_hybrid_idx = self._new_to_hybrid_map[new_idx]
old_to_hybrid_idx = self._old_to_hybrid_map[old_idx]
if new_to_hybrid_idx != old_to_hybrid_idx:
errmsg = (f"there is an index collision in hybrid indices of "
f"the core atom map: {self._core_new_to_old_map}")
raise AssertionError(errmsg)
self._atom_classes['core_atoms'].add(new_to_hybrid_idx)
# The environment atoms:
for new_idx, old_idx in self._env_new_to_old_map.items():
new_to_hybrid_idx = self._new_to_hybrid_map[new_idx]
old_to_hybrid_idx = self._old_to_hybrid_map[old_idx]
if new_to_hybrid_idx != old_to_hybrid_idx:
errmsg = (f"there is an index collion in hybrid indices of "
f"the environment atom map: "
f"{self._env_new_to_old_map}")
raise AssertionError(errmsg)
self._atom_classes['environment_atoms'].add(new_to_hybrid_idx)
@staticmethod
def _generate_dict_from_exceptions(force):
"""
This is a utility function to generate a dictionary of the form
(particle1_idx, particle2_idx) : [exception parameters].
This will facilitate access and search of exceptions.
Parameters
----------
force : openmm.NonbondedForce object
a force containing exceptions
Returns
-------
exceptions_dict : dict
Dictionary of exceptions
"""
exceptions_dict = {}
for exception_index in range(force.getNumExceptions()):
[index1, index2, chargeProd, sigma, epsilon] = force.getExceptionParameters(exception_index)
exceptions_dict[(index1, index2)] = [chargeProd, sigma, epsilon]
return exceptions_dict
def _validate_disjoint_sets(self):
"""
Conduct a sanity check to make sure that the hybrid maps of the old
and new system exception dict keys do not contain both environment
and unique_old/new atoms.
TODO: repeated code - condense
"""
for old_indices in self._old_system_exceptions.keys():
hybrid_indices = (self._old_to_hybrid_map[old_indices[0]],
self._old_to_hybrid_map[old_indices[1]])
old_env_intersection = set(old_indices).intersection(
self._atom_classes['environment_atoms'])
if old_env_intersection:
if set(old_indices).intersection(
self._atom_classes['unique_old_atoms']
):
errmsg = (f"old index exceptions {old_indices} include "
"unique old and environment atoms, which is "
"disallowed")
raise AssertionError(errmsg)
for new_indices in self._new_system_exceptions.keys():
hybrid_indices = (self._new_to_hybrid_map[new_indices[0]],
self._new_to_hybrid_map[new_indices[1]])
new_env_intersection = set(hybrid_indices).intersection(
self._atom_classes['environment_atoms'])
if new_env_intersection:
if set(hybrid_indices).intersection(
self._atom_classes['unique_new_atoms']
):
errmsg = (f"new index exceptions {new_indices} include "
"unique new and environment atoms, which is "
"disallowed")
raise AssertionError
def _handle_constraints(self):
"""
This method adds relevant constraints from the old and new systems.
First, all constraints from the old systenm are added.
Then, constraints to atoms unique to the new system are added.
TODO: condense duplicated code
"""
# lengths of constraints already added
constraint_lengths = dict()
# old system
hybrid_map = self._old_to_hybrid_map
for const_idx in range(self._old_system.getNumConstraints()):
at1, at2, length = self._old_system.getConstraintParameters(
const_idx)
hybrid_atoms = tuple(sorted([hybrid_map[at1], hybrid_map[at2]]))
if hybrid_atoms not in constraint_lengths.keys():
self._hybrid_system.addConstraint(hybrid_atoms[0],
hybrid_atoms[1], length)
constraint_lengths[hybrid_atoms] = length
else:
if constraint_lengths[hybrid_atoms] != length:
raise AssertionError('constraint length is changing')
# new system
hybrid_map = self._new_to_hybrid_map
for const_idx in range(self._new_system.getNumConstraints()):
at1, at2, length = self._new_system.getConstraintParameters(
const_idx)
hybrid_atoms = tuple(sorted([hybrid_map[at1], hybrid_map[at2]]))
if hybrid_atoms not in constraint_lengths.keys():
self._hybrid_system.addConstraint(hybrid_atoms[0],
hybrid_atoms[1], length)
constraint_lengths[hybrid_atoms] = length
else:
if constraint_lengths[hybrid_atoms] != length:
raise AssertionError('constraint length is changing')
@staticmethod
def _copy_threeparticleavg(atm_map, env_atoms, vs):
"""
Helper method to copy a ThreeParticleAverageSite virtual site
from two mapped Systems.
Parameters
----------
atm_map : dict[int, int]
The atom map correspondence between the two Systems.
env_atoms: set[int]
A list of environment atoms for the target System. This
checks that no alchemical atoms are being tied to.
vs : openmm.ThreeParticleAverageSite
Returns
-------
openmm.ThreeParticleAverageSite
"""
particles = {}
weights = {}
for i in range(vs.getNumParticles()):
particles[i] = atm_map[vs.getParticle(i)]
weights[i] = vs.getWeight(i)
if not all(i in env_atoms for i in particles.values()):
errmsg = ("Virtual sites bound to non-environment atoms "
"are not supported")
raise ValueError(errmsg)
return openmm.ThreeParticleAverageSite(
particles[0], particles[1], particles[2],
weights[0], weights[1], weights[2],
)
def _handle_virtual_sites(self):
"""
Ensure that all virtual sites in old and new system are copied over to
the hybrid system. Note that we do not support virtual sites in the
changing region.
TODO - remerge into a single loop
TODO - check that it's fine to double count here (even so, there's
an optimisation that could be done here...)
"""
# old system
# Loop through virtual sites
for particle_idx in range(self._old_system.getNumParticles()):
if self._old_system.isVirtualSite(particle_idx):
# If it's a virtual site, make sure it is not in the unique or
# core atoms, since this is currently unsupported
hybrid_idx = self._old_to_hybrid_map[particle_idx]
if hybrid_idx not in self._atom_classes['environment_atoms']:
errmsg = ("Virtual sites in changing residue are "
"unsupported.")
raise ValueError(errmsg)
else:
virtual_site = self._old_system.getVirtualSite(
particle_idx)
if isinstance(
virtual_site, openmm.ThreeParticleAverageSite):
vs_copy = self._copy_threeparticleavg(
self._old_to_hybrid_map,
self._atom_classes['environment_atoms'],
virtual_site,
)
else:
errmsg = ("Unsupported VirtualSite "
f"class: {virtual_site}")
raise ValueError(errmsg)
self._hybrid_system.setVirtualSite(hybrid_idx,
vs_copy)
# new system - there should be nothing left to add
# Loop through virtual sites
for particle_idx in range(self._new_system.getNumParticles()):
if self._new_system.isVirtualSite(particle_idx):
# If it's a virtual site, make sure it is not in the unique or
# core atoms, since this is currently unsupported
hybrid_idx = self._new_to_hybrid_map[particle_idx]
if hybrid_idx not in self._atom_classes['environment_atoms']:
errmsg = ("Virtual sites in changing residue are "
"unsupported.")
raise ValueError(errmsg)
else:
if not self._hybrid_system.isVirtualSite(hybrid_idx):
errmsg = ("Environment virtual site in new system "
"found not copied from old system")
raise ValueError(errmsg)
def _add_bond_force_terms(self):
"""
This function adds the appropriate bond forces to the system
(according to groups defined in the main class docstring). Note that
it does _not_ add the particles to the force. It only adds the force
to facilitate another method adding the particles to the force.
Notes
-----
* User defined functions have been removed for now.
"""
core_energy_expression = '(K/2)*(r-length)^2;'
# linearly interpolate spring constant
core_energy_expression += 'K = (1-lambda_bonds)*K1 + lambda_bonds*K2;'
# linearly interpolate bond length
core_energy_expression += 'length = (1-lambda_bonds)*length1 + lambda_bonds*length2;'
# Create the force and add the relevant parameters
custom_core_force = openmm.CustomBondForce(core_energy_expression)
custom_core_force.addPerBondParameter('length1') # old bond length
custom_core_force.addPerBondParameter('K1') # old spring constant
custom_core_force.addPerBondParameter('length2') # new bond length
custom_core_force.addPerBondParameter('K2') # new spring constant
custom_core_force.addGlobalParameter('lambda_bonds', 0.0)
self._hybrid_system.addForce(custom_core_force)
self._hybrid_system_forces['core_bond_force'] = custom_core_force
# Add a bond force for environment and unique atoms (bonds are never
# scaled for these):
standard_bond_force = openmm.HarmonicBondForce()
self._hybrid_system.addForce(standard_bond_force)
self._hybrid_system_forces['standard_bond_force'] = standard_bond_force
def _add_angle_force_terms(self):
"""
This function adds the appropriate angle force terms to the hybrid
system. It does not add particles or parameters to the force; this is
done elsewhere.
Notes
-----
* User defined functions have been removed for now.
* Neglected angle terms have been removed for now.
"""
energy_expression = '(K/2)*(theta-theta0)^2;'
# linearly interpolate spring constant
energy_expression += 'K = (1.0-lambda_angles)*K_1 + lambda_angles*K_2;'
# linearly interpolate equilibrium angle
energy_expression += 'theta0 = (1.0-lambda_angles)*theta0_1 + lambda_angles*theta0_2;'
# Create the force and add relevant parameters
custom_core_force = openmm.CustomAngleForce(energy_expression)
# molecule1 equilibrium angle
custom_core_force.addPerAngleParameter('theta0_1')
# molecule1 spring constant
custom_core_force.addPerAngleParameter('K_1')
# molecule2 equilibrium angle
custom_core_force.addPerAngleParameter('theta0_2')
# molecule2 spring constant
custom_core_force.addPerAngleParameter('K_2')
custom_core_force.addGlobalParameter('lambda_angles', 0.0)
# Add the force to the system and the force dict.
self._hybrid_system.addForce(custom_core_force)
self._hybrid_system_forces['core_angle_force'] = custom_core_force
# Add an angle term for environment/unique interactions -- these are
# never scaled
standard_angle_force = openmm.HarmonicAngleForce()
self._hybrid_system.addForce(standard_angle_force)
self._hybrid_system_forces['standard_angle_force'] = standard_angle_force
def _add_torsion_force_terms(self):
"""
This function adds the appropriate PeriodicTorsionForce terms to the
system. Core torsions are interpolated, while environment and unique
torsions are always on.
Notes
-----
* User defined functions have been removed for now.
* Options for add_custom_core_force (default True) and
add_unique_atom_torsion_force (default True) have been removed for
now.
"""
energy_expression = '(1-lambda_torsions)*U1 + lambda_torsions*U2;'
energy_expression += 'U1 = K1*(1+cos(periodicity1*theta-phase1));'
energy_expression += 'U2 = K2*(1+cos(periodicity2*theta-phase2));'
# Create the force and add the relevant parameters
custom_core_force = openmm.CustomTorsionForce(energy_expression)
# molecule1 periodicity
custom_core_force.addPerTorsionParameter('periodicity1')
# molecule1 phase
custom_core_force.addPerTorsionParameter('phase1')
# molecule1 spring constant
custom_core_force.addPerTorsionParameter('K1')
# molecule2 periodicity
custom_core_force.addPerTorsionParameter('periodicity2')
# molecule2 phase
custom_core_force.addPerTorsionParameter('phase2')
# molecule2 spring constant
custom_core_force.addPerTorsionParameter('K2')
custom_core_force.addGlobalParameter('lambda_torsions', 0.0)
# Add the force to the system
self._hybrid_system.addForce(custom_core_force)
self._hybrid_system_forces['custom_torsion_force'] = custom_core_force
# Create and add the torsion term for unique/environment atoms
unique_atom_torsion_force = openmm.PeriodicTorsionForce()
self._hybrid_system.addForce(unique_atom_torsion_force)
self._hybrid_system_forces['unique_atom_torsion_force'] = unique_atom_torsion_force
@staticmethod
def _nonbonded_custom(v2):
"""
Get a part of the nonbonded energy expression when there is no cutoff.
Parameters
----------
v2 : bool
Whether to use the softcore methods as defined by Gapsys et al.
JCTC 2012.
Returns
-------
sterics_energy_expression : str
The energy expression for U_sterics
electrostatics_energy_expression : str
The energy expression for electrostatics
TODO
----
* Move to a dictionary or equivalent.
"""
# Soft-core Lennard-Jones
if v2:
sterics_energy_expression = "U_sterics = select(step(r - r_LJ), 4*epsilon*x*(x-1.0), U_sterics_quad);"
sterics_energy_expression += "U_sterics_quad = Force*(((r - r_LJ)^2)/2 - (r - r_LJ)) + U_sterics_cut;"
sterics_energy_expression += "U_sterics_cut = 4*epsilon*((sigma/r_LJ)^6)*(((sigma/r_LJ)^6) - 1.0);"
sterics_energy_expression += "Force = -4*epsilon*((-12*sigma^12)/(r_LJ^13) + (6*sigma^6)/(r_LJ^7));"
sterics_energy_expression += "x = (sigma/r)^6;"
sterics_energy_expression += "r_LJ = softcore_alpha*((26/7)*(sigma^6)*lambda_sterics_deprecated)^(1/6);"
sterics_energy_expression += "lambda_sterics_deprecated = new_interaction*(1.0 - lambda_sterics_insert) + old_interaction*lambda_sterics_delete;"
else:
sterics_energy_expression = "U_sterics = 4*epsilon*x*(x-1.0); x = (sigma/reff_sterics)^6;"
return sterics_energy_expression
@staticmethod
def _nonbonded_custom_sterics_common():
"""
Get a custom sterics expression using amber softcore expression
Returns
-------
sterics_addition : str
The common softcore sterics energy expression
TODO
----
* Move to a dictionary or equivalent.
"""
# interpolation
sterics_addition = "epsilon = (1-lambda_sterics)*epsilonA + lambda_sterics*epsilonB;"
# effective softcore distance for sterics
sterics_addition += "reff_sterics = sigma*((softcore_alpha*lambda_alpha + (r/sigma)^6))^(1/6);"
sterics_addition += "sigma = (1-lambda_sterics)*sigmaA + lambda_sterics*sigmaB;"
sterics_addition += "lambda_alpha = new_interaction*(1-lambda_sterics_insert) + old_interaction*lambda_sterics_delete;"
sterics_addition += "lambda_sterics = core_interaction*lambda_sterics_core + new_interaction*lambda_sterics_insert + old_interaction*lambda_sterics_delete;"
sterics_addition += "core_interaction = delta(unique_old1+unique_old2+unique_new1+unique_new2);new_interaction = max(unique_new1, unique_new2);old_interaction = max(unique_old1, unique_old2);"
return sterics_addition
@staticmethod
def _nonbonded_custom_mixing_rules():
"""
Mixing rules for the custom nonbonded force.
Returns
-------
sterics_mixing_rules : str
The mixing expression for sterics
electrostatics_mixing_rules : str
The mixiing rules for electrostatics
TODO
----
* Move to a dictionary or equivalent.
"""
# Define mixing rules.
# mixing rule for epsilon
sterics_mixing_rules = "epsilonA = sqrt(epsilonA1*epsilonA2);"
# mixing rule for epsilon
sterics_mixing_rules += "epsilonB = sqrt(epsilonB1*epsilonB2);"
# mixing rule for sigma
sterics_mixing_rules += "sigmaA = 0.5*(sigmaA1 + sigmaA2);"
# mixing rule for sigma
sterics_mixing_rules += "sigmaB = 0.5*(sigmaB1 + sigmaB2);"
return sterics_mixing_rules
@staticmethod
def _translate_nonbonded_method_to_custom(standard_nonbonded_method):
"""
Utility function to translate the nonbonded method enum from the
standard nonbonded force to the custom version
`CutoffPeriodic`, `PME`, and `Ewald` all become `CutoffPeriodic`;
`NoCutoff` becomes `NoCutoff`; `CutoffNonPeriodic` becomes
`CutoffNonPeriodic`
Parameters
----------
standard_nonbonded_method : openmm.NonbondedForce.NonbondedMethod
the nonbonded method of the standard force
Returns
-------
custom_nonbonded_method : openmm.CustomNonbondedForce.NonbondedMethod
the nonbonded method for the equivalent customnonbonded force
"""
if standard_nonbonded_method in [openmm.NonbondedForce.CutoffPeriodic,
openmm.NonbondedForce.PME,
openmm.NonbondedForce.Ewald]:
return openmm.CustomNonbondedForce.CutoffPeriodic
elif standard_nonbonded_method == openmm.NonbondedForce.NoCutoff:
return openmm.CustomNonbondedForce.NoCutoff
elif standard_nonbonded_method == openmm.NonbondedForce.CutoffNonPeriodic:
return openmm.CustomNonbondedForce.CutoffNonPeriodic
else:
errmsg = "This nonbonded method is not supported."
raise NotImplementedError(errmsg)
def _add_nonbonded_force_terms(self):
"""
Add the nonbonded force terms to the hybrid system. Note that as with
the other forces, this method does not add any interactions. It only
sets up the forces.
Notes
-----
* User defined functions have been removed for now.
* Argument `add_custom_sterics_force` (default True) has been removed
for now.
TODO
----
* Move nonbonded_method defn here to avoid just setting it globally
and polluting `self`.
"""
# Add a regular nonbonded force for all interactions that are not
# changing.
standard_nonbonded_force = openmm.NonbondedForce()
self._hybrid_system.addForce(standard_nonbonded_force)
self._hybrid_system_forces['standard_nonbonded_force'] = standard_nonbonded_force
# Create a CustomNonbondedForce to handle alchemically interpolated
# nonbonded parameters.
# Select functional form based on nonbonded method.
# TODO: check _nonbonded_custom_ewald and _nonbonded_custom_cutoff
# since they take arguments that are never used...
r_cutoff = self._old_system_forces['NonbondedForce'].getCutoffDistance()
sterics_energy_expression = self._nonbonded_custom(self._softcore_LJ_v2)
if self._nonbonded_method in [openmm.NonbondedForce.NoCutoff]:
sterics_energy_expression = self._nonbonded_custom(
self._softcore_LJ_v2)
elif self._nonbonded_method in [openmm.NonbondedForce.CutoffPeriodic,
openmm.NonbondedForce.CutoffNonPeriodic]:
epsilon_solvent = self._old_system_forces['NonbondedForce'].getReactionFieldDielectric()
standard_nonbonded_force.setReactionFieldDielectric(
epsilon_solvent)
standard_nonbonded_force.setCutoffDistance(r_cutoff)
elif self._nonbonded_method in [openmm.NonbondedForce.PME,
openmm.NonbondedForce.Ewald]:
[alpha_ewald, nx, ny, nz] = self._old_system_forces['NonbondedForce'].getPMEParameters()
delta = self._old_system_forces['NonbondedForce'].getEwaldErrorTolerance()
standard_nonbonded_force.setPMEParameters(alpha_ewald, nx, ny, nz)
standard_nonbonded_force.setEwaldErrorTolerance(delta)
standard_nonbonded_force.setCutoffDistance(r_cutoff)
else:
errmsg = f"Nonbonded method {self._nonbonded_method} not supported"
raise ValueError(errmsg)
standard_nonbonded_force.setNonbondedMethod(self._nonbonded_method)
sterics_energy_expression += self._nonbonded_custom_sterics_common()
sterics_mixing_rules = self._nonbonded_custom_mixing_rules()
custom_nonbonded_method = self._translate_nonbonded_method_to_custom(
self._nonbonded_method)
total_sterics_energy = "U_sterics;" + sterics_energy_expression + sterics_mixing_rules
sterics_custom_nonbonded_force = openmm.CustomNonbondedForce(
total_sterics_energy)
# Match cutoff from non-custom NB forces
sterics_custom_nonbonded_force.setCutoffDistance(r_cutoff)
if self._softcore_LJ_v2:
sterics_custom_nonbonded_force.addGlobalParameter(
"softcore_alpha", self._softcore_LJ_v2_alpha)
else:
sterics_custom_nonbonded_force.addGlobalParameter(
"softcore_alpha", self._softcore_alpha)
# Lennard-Jones sigma initial
sterics_custom_nonbonded_force.addPerParticleParameter("sigmaA")
# Lennard-Jones epsilon initial
sterics_custom_nonbonded_force.addPerParticleParameter("epsilonA")
# Lennard-Jones sigma final
sterics_custom_nonbonded_force.addPerParticleParameter("sigmaB")
# Lennard-Jones epsilon final
sterics_custom_nonbonded_force.addPerParticleParameter("epsilonB")
# 1 = hybrid old atom, 0 otherwise
sterics_custom_nonbonded_force.addPerParticleParameter("unique_old")
# 1 = hybrid new atom, 0 otherwise
sterics_custom_nonbonded_force.addPerParticleParameter("unique_new")
sterics_custom_nonbonded_force.addGlobalParameter(
"lambda_sterics_core", 0.0)
sterics_custom_nonbonded_force.addGlobalParameter(
"lambda_electrostatics_core", 0.0)
sterics_custom_nonbonded_force.addGlobalParameter(
"lambda_sterics_insert", 0.0)
sterics_custom_nonbonded_force.addGlobalParameter(
"lambda_sterics_delete", 0.0)
sterics_custom_nonbonded_force.setNonbondedMethod(
custom_nonbonded_method)
self._hybrid_system.addForce(sterics_custom_nonbonded_force)
self._hybrid_system_forces['core_sterics_force'] = sterics_custom_nonbonded_force
# Set the use of dispersion correction to be the same between the new
# nonbonded force and the old one:
if self._old_system_forces['NonbondedForce'].getUseDispersionCorrection():
self._hybrid_system_forces['standard_nonbonded_force'].setUseDispersionCorrection(True)
if self._use_dispersion_correction:
sterics_custom_nonbonded_force.setUseLongRangeCorrection(True)
else:
self._hybrid_system_forces['standard_nonbonded_force'].setUseDispersionCorrection(False)
if self._old_system_forces['NonbondedForce'].getUseSwitchingFunction():
switching_distance = self._old_system_forces['NonbondedForce'].getSwitchingDistance()
standard_nonbonded_force.setUseSwitchingFunction(True)
standard_nonbonded_force.setSwitchingDistance(switching_distance)
sterics_custom_nonbonded_force.setUseSwitchingFunction(True)
sterics_custom_nonbonded_force.setSwitchingDistance(switching_distance)
else:
standard_nonbonded_force.setUseSwitchingFunction(False)
sterics_custom_nonbonded_force.setUseSwitchingFunction(False)
@staticmethod
def _find_bond_parameters(bond_force, index1, index2):
"""
This is a convenience function to find bond parameters in another
system given the two indices.
Parameters
----------
bond_force : openmm.HarmonicBondForce
The bond force where the parameters should be found
index1 : int
Index1 (order does not matter) of the bond atoms
index2 : int
Index2 (order does not matter) of the bond atoms
Returns
-------
bond_parameters : list
List of relevant bond parameters
"""
index_set = {index1, index2}
# Loop through all the bonds:
for bond_index in range(bond_force.getNumBonds()):
parms = bond_force.getBondParameters(bond_index)
if index_set == {parms[0], parms[1]}:
return parms
return []
def _handle_harmonic_bonds(self):
"""
This method adds the appropriate interaction for all bonds in the
hybrid system. The scheme used is:
1) If the two atoms are both in the core, then we add to the
CustomBondForce and interpolate between the two parameters
2) If one of the atoms is in core and the other is environment, we
have to assert that the bond parameters do not change between the
old and the new system; then, the parameters are added to the
regular bond force
3) Otherwise, we add the bond to a regular bond force.
Notes
-----
* Bond softening logic has been removed for now.
"""
old_system_bond_force = self._old_system_forces['HarmonicBondForce']
new_system_bond_force = self._new_system_forces['HarmonicBondForce']
# First, loop through the old system bond forces and add relevant terms
for bond_index in range(old_system_bond_force.getNumBonds()):
# Get each set of bond parameters
[index1_old, index2_old, r0_old, k_old] = old_system_bond_force.getBondParameters(bond_index)
# Map the indices to the hybrid system, for which our atom classes
# are defined.
index1_hybrid = self._old_to_hybrid_map[index1_old]
index2_hybrid = self._old_to_hybrid_map[index2_old]
index_set = {index1_hybrid, index2_hybrid}
# Now check if it is a subset of the core atoms (that is, both
# atoms are in the core)
# If it is, we need to find the parameters in the old system so
# that we can interpolate
if index_set.issubset(self._atom_classes['core_atoms']):
index1_new = self._old_to_new_map[index1_old]
index2_new = self._old_to_new_map[index2_old]
new_bond_parameters = self._find_bond_parameters(
new_system_bond_force, index1_new, index2_new)
if not new_bond_parameters:
r0_new = r0_old
k_new = 0.0*unit.kilojoule_per_mole/unit.angstrom**2
else:
# TODO - why is this being recalculated?
[index1, index2, r0_new, k_new] = self._find_bond_parameters(
new_system_bond_force, index1_new, index2_new)
self._hybrid_system_forces['core_bond_force'].addBond(
index1_hybrid, index2_hybrid,
[r0_old, k_old, r0_new, k_new])
# Check if the index set is a subset of anything besides
# environment (in the case of environment, we just add the bond to
# the regular bond force)
# that would mean that this bond is core-unique_old or
# unique_old-unique_old
# NOTE - These are currently all the same because we don't soften
# TODO - work these out somewhere else, this is terribly difficult
# to understand logic.
elif (index_set.issubset(self._atom_classes['unique_old_atoms']) or
(len(index_set.intersection(self._atom_classes['unique_old_atoms'])) == 1
and len(index_set.intersection(self._atom_classes['core_atoms'])) == 1)):
# We can just add it to the regular bond force.
self._hybrid_system_forces['standard_bond_force'].addBond(
index1_hybrid, index2_hybrid, r0_old, k_old)
elif (len(index_set.intersection(self._atom_classes['environment_atoms'])) == 1 and
len(index_set.intersection(self._atom_classes['core_atoms'])) == 1):
self._hybrid_system_forces['standard_bond_force'].addBond(
index1_hybrid, index2_hybrid, r0_old, k_old)
# Otherwise, we just add the same parameters as those in the old
# system (these are environment atoms, and the parameters are the
# same)
elif index_set.issubset(self._atom_classes['environment_atoms']):
self._hybrid_system_forces['standard_bond_force'].addBond(
index1_hybrid, index2_hybrid, r0_old, k_old)
else:
errmsg = (f"hybrid index set {index_set} does not fit into a "
"canonical atom type")
raise ValueError(errmsg)
# Now loop through the new system to get the interactions that are
# unique to it.
for bond_index in range(new_system_bond_force.getNumBonds()):
# Get each set of bond parameters
[index1_new, index2_new, r0_new, k_new] = new_system_bond_force.getBondParameters(bond_index)
# Convert indices to hybrid, since that is how we represent atom classes:
index1_hybrid = self._new_to_hybrid_map[index1_new]
index2_hybrid = self._new_to_hybrid_map[index2_new]
index_set = {index1_hybrid, index2_hybrid}
# If the intersection of this set and unique new atoms contains
# anything, the bond is unique to the new system and must be added
# all other bonds in the new system have been accounted for already
# NOTE - These are mostly all the same because we don't soften
if (len(index_set.intersection(self._atom_classes['unique_new_atoms'])) == 2 or
(len(index_set.intersection(self._atom_classes['unique_new_atoms'])) == 1 and
len(index_set.intersection(self._atom_classes['core_atoms'])) == 1)):
# If we aren't softening bonds, then just add it to the standard bond force
self._hybrid_system_forces['standard_bond_force'].addBond(
index1_hybrid, index2_hybrid, r0_new, k_new)
# If the bond is in the core, it has probably already been added
# in the above loop. However, there are some circumstances
# where it was not (closing a ring). In that case, the bond has
# not been added and should be added here.
# This has some peculiarities to be discussed...
# TODO - Work out what the above peculiarities are...
elif index_set.issubset(self._atom_classes['core_atoms']):
if not self._find_bond_parameters(
self._hybrid_system_forces['core_bond_force'],
index1_hybrid, index2_hybrid):
r0_old = r0_new
k_old = 0.0*unit.kilojoule_per_mole/unit.angstrom**2
self._hybrid_system_forces['core_bond_force'].addBond(
index1_hybrid, index2_hybrid,
[r0_old, k_old, r0_new, k_new])
elif index_set.issubset(self._atom_classes['environment_atoms']):
# Already been added
pass
elif (len(index_set.intersection(self._atom_classes['environment_atoms'])) == 1 and
len(index_set.intersection(self._atom_classes['core_atoms'])) == 1):
pass
else:
errmsg = (f"hybrid index set {index_set} does not fit into a "
"canonical atom type")
raise ValueError(errmsg)
@staticmethod
def _find_angle_parameters(angle_force, indices):
"""
Convenience function to find the angle parameters corresponding to a
particular set of indices
Parameters
----------
angle_force : openmm.HarmonicAngleForce
The force where the angle of interest may be found.
indices : list of int
The indices (any order) of the angle atoms
Returns
-------
angle_params : list
list of angle parameters
"""
indices_reversed = indices[::-1]
# Now loop through and try to find the angle:
for angle_index in range(angle_force.getNumAngles()):
angle_params = angle_force.getAngleParameters(angle_index)
# Get a set representing the angle indices
angle_param_indices = angle_params[:3]
if (indices == angle_param_indices or
indices_reversed == angle_param_indices):
return angle_params
return [] # Return empty if no matching angle found
def _handle_harmonic_angles(self):
"""
This method adds the appropriate interaction for all angles in the
hybrid system. The scheme used, as with bonds, is:
1) If the three atoms are all in the core, then we add to the
CustomAngleForce and interpolate between the two parameters
2) If the three atoms contain at least one unique new, check if the
angle is in the neglected new list, and if so, interpolate from
K_1 = 0; else, if the three atoms contain at least one unique old,
check if the angle is in the neglected old list, and if so,
interpolate from K_2 = 0.
3) If the angle contains at least one environment and at least one
core atom, assert there are no unique new atoms and that the angle
terms are preserved between the new and the old system. Then add to
the standard angle force.
4) Otherwise, we add the angle to a regular angle force since it is
environment.
Notes
-----
* Removed softening and neglected angle functionality
"""
old_system_angle_force = self._old_system_forces['HarmonicAngleForce']
new_system_angle_force = self._new_system_forces['HarmonicAngleForce']
# First, loop through all the angles in the old system to determine
# what to do with them. We will only use the
# custom angle force if all atoms are part of "core." Otherwise, they
# are either unique to one system or never change.
for angle_index in range(old_system_angle_force.getNumAngles()):
old_angle_parameters = old_system_angle_force.getAngleParameters(
angle_index)
# Get the indices in the hybrid system
hybrid_index_list = [
self._old_to_hybrid_map[old_atomid] for old_atomid in old_angle_parameters[:3]
]
hybrid_index_set = set(hybrid_index_list)
# If all atoms are in the core, we'll need to find the
# corresponding parameters in the old system and interpolate
if hybrid_index_set.issubset(self._atom_classes['core_atoms']):
# Get the new indices so we can get the new angle parameters
new_indices = [
self._old_to_new_map[old_atomid] for old_atomid in old_angle_parameters[:3]
]
new_angle_parameters = self._find_angle_parameters(
new_system_angle_force, new_indices
)
if not new_angle_parameters:
new_angle_parameters = [
0, 0, 0, old_angle_parameters[3],
0.0*unit.kilojoule_per_mole/unit.radian**2
]
# Add to the hybrid force:
# the parameters at indices 3 and 4 represent theta0 and k,
# respectively.
hybrid_force_parameters = [
old_angle_parameters[3], old_angle_parameters[4],
new_angle_parameters[3], new_angle_parameters[4]
]
self._hybrid_system_forces['core_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_force_parameters
)
# Check if the atoms are neither all core nor all environment,
# which would mean they involve unique old interactions
elif not hybrid_index_set.issubset(
self._atom_classes['environment_atoms']):
# if there is an environment atom
if hybrid_index_set.intersection(
self._atom_classes['environment_atoms']):
if hybrid_index_set.intersection(
self._atom_classes['unique_old_atoms']):
errmsg = "we disallow unique-environment terms"
raise ValueError(errmsg)
self._hybrid_system_forces['standard_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], old_angle_parameters[3],
old_angle_parameters[4]
)
else:
# There are no env atoms, so we can treat this term
# appropriately
# We don't soften so just add this to the standard angle
# force
self._hybrid_system_forces['standard_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], old_angle_parameters[3],
old_angle_parameters[4]
)
# Otherwise, only environment atoms are in this interaction, so
# add it to the standard angle force
elif hybrid_index_set.issubset(
self._atom_classes['environment_atoms']):
self._hybrid_system_forces['standard_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], old_angle_parameters[3],
old_angle_parameters[4]
)
else:
errmsg = (f"handle_harmonic_angles: angle_index {angle_index} "
"does not fit a canonical form.")
raise ValueError(errmsg)
# Finally, loop through the new system force to add any unique new
# angles
for angle_index in range(new_system_angle_force.getNumAngles()):
new_angle_parameters = new_system_angle_force.getAngleParameters(
angle_index)
# Get the indices in the hybrid system
hybrid_index_list = [
self._new_to_hybrid_map[new_atomid] for new_atomid in new_angle_parameters[:3]
]
hybrid_index_set = set(hybrid_index_list)
# If the intersection of this hybrid set with the unique new atoms
# is nonempty, it must be added:
# TODO - there's a ton of len > 0 on sets, empty sets == False,
# so we can simplify this logic.
if len(hybrid_index_set.intersection(
self._atom_classes['unique_new_atoms'])) > 0:
if hybrid_index_set.intersection(
self._atom_classes['environment_atoms']):
errmsg = ("we disallow angle terms with unique new and "
"environment atoms")
raise ValueError(errmsg)
# Not softening just add to the nonalchemical force
self._hybrid_system_forces['standard_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], new_angle_parameters[3],
new_angle_parameters[4]
)
elif hybrid_index_set.issubset(self._atom_classes['core_atoms']):
if not self._find_angle_parameters(self._hybrid_system_forces['core_angle_force'],
hybrid_index_list):
hybrid_force_parameters = [
new_angle_parameters[3],
0.0*unit.kilojoule_per_mole/unit.radian**2,
new_angle_parameters[3], new_angle_parameters[4]
]
self._hybrid_system_forces['core_angle_force'].addAngle(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_force_parameters
)
elif hybrid_index_set.issubset(self._atom_classes['environment_atoms']):
# We have already added the appropriate environmental atom
# terms
pass
elif hybrid_index_set.intersection(self._atom_classes['environment_atoms']):
if hybrid_index_set.intersection(self._atom_classes['unique_new_atoms']):
errmsg = ("we disallow angle terms with unique new and "
"environment atoms")
raise ValueError(errmsg)
else:
errmsg = (f"hybrid index list {hybrid_index_list} does not "
"fit into a canonical atom set")
raise ValueError(errmsg)
@staticmethod
def _find_torsion_parameters(torsion_force, indices):
"""
Convenience function to find the torsion parameters corresponding to a
particular set of indices.
Parameters
----------
torsion_force : openmm.PeriodicTorsionForce
torsion force where the torsion of interest may be found
indices : list of int
The indices of the atoms of the torsion
Returns
-------
torsion_parameters : list
torsion parameters
"""
indices_reversed = indices[::-1]
torsion_params_list = list()
# Now loop through and try to find the torsion:
for torsion_idx in range(torsion_force.getNumTorsions()):
torsion_params = torsion_force.getTorsionParameters(torsion_idx)
# Get a set representing the torsion indices:
torsion_param_indices = torsion_params[:4]
if (indices == torsion_param_indices or
indices_reversed == torsion_param_indices):
torsion_params_list.append(torsion_params)
return torsion_params_list
def _handle_periodic_torsion_force(self):
"""
Handle the torsions defined in the new and old systems as such:
1. old system torsions will enter the ``custom_torsion_force`` if they
do not contain ``unique_old_atoms`` and will interpolate from ``on``
to ``off`` from ``lambda_torsions`` = 0 to 1, respectively.
2. new system torsions will enter the ``custom_torsion_force`` if they
do not contain ``unique_new_atoms`` and will interpolate from
``off`` to ``on`` from ``lambda_torsions`` = 0 to 1, respectively.
3. old *and* new system torsions will enter the
``unique_atom_torsion_force`` (``standard_torsion_force``) and will
*not* be interpolated.
Notes
-----
* Torsion flattening logic has been removed for now.
"""
old_system_torsion_force = self._old_system_forces['PeriodicTorsionForce']
new_system_torsion_force = self._new_system_forces['PeriodicTorsionForce']
auxiliary_custom_torsion_force = []
old_custom_torsions_to_standard = []
# We need to keep track of what torsions we added so that we do not
# double count
# added_torsions = []
# TODO: Commented out since this actually isn't being done anywhere?
# Is it necessary? Should we add this logic back in?
for torsion_index in range(old_system_torsion_force.getNumTorsions()):
torsion_parameters = old_system_torsion_force.getTorsionParameters(
torsion_index)
# Get the indices in the hybrid system
hybrid_index_list = [
self._old_to_hybrid_map[old_index] for old_index in torsion_parameters[:4]
]
hybrid_index_set = set(hybrid_index_list)
# If all atoms are in the core, we'll need to find the
# corresponding parameters in the old system and interpolate
if hybrid_index_set.intersection(self._atom_classes['unique_old_atoms']):
# Then it goes to a standard force...
self._hybrid_system_forces['unique_atom_torsion_force'].addTorsion(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
torsion_parameters[4], torsion_parameters[5],
torsion_parameters[6]
)
else:
# It is a core-only term, an environment-only term, or a
# core/env term; in any case, it goes to the core torsion_force
# TODO - why are we even adding the 0.0, 0.0, 0.0 section?
hybrid_force_parameters = [
torsion_parameters[4], torsion_parameters[5],
torsion_parameters[6], 0.0, 0.0, 0.0
]
auxiliary_custom_torsion_force.append(
[hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
hybrid_force_parameters[:3]]
)
for torsion_index in range(new_system_torsion_force.getNumTorsions()):
torsion_parameters = new_system_torsion_force.getTorsionParameters(torsion_index)
# Get the indices in the hybrid system:
hybrid_index_list = [
self._new_to_hybrid_map[new_index] for new_index in torsion_parameters[:4]]
hybrid_index_set = set(hybrid_index_list)
if hybrid_index_set.intersection(self._atom_classes['unique_new_atoms']):
# Then it goes to the custom torsion force (scaled to zero)
self._hybrid_system_forces['unique_atom_torsion_force'].addTorsion(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
torsion_parameters[4], torsion_parameters[5],
torsion_parameters[6]
)
else:
hybrid_force_parameters = [
0.0, 0.0, 0.0, torsion_parameters[4],
torsion_parameters[5], torsion_parameters[6]]
# Check to see if this term is in the olds...
term = [hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
hybrid_force_parameters[3:]]
if term in auxiliary_custom_torsion_force:
# Then this terms has to go to standard and be deleted...
old_index = auxiliary_custom_torsion_force.index(term)
old_custom_torsions_to_standard.append(old_index)
self._hybrid_system_forces['unique_atom_torsion_force'].addTorsion(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
torsion_parameters[4], torsion_parameters[5],
torsion_parameters[6]
)
else:
# Then this term has to go to the core force...
self._hybrid_system_forces['custom_torsion_force'].addTorsion(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
hybrid_force_parameters
)
# Now we have to loop through the aux custom torsion force
for index in [q for q in range(len(auxiliary_custom_torsion_force))
if q not in old_custom_torsions_to_standard]:
terms = auxiliary_custom_torsion_force[index]
hybrid_index_list = terms[:4]
hybrid_force_parameters = terms[4] + [0., 0., 0.]
self._hybrid_system_forces['custom_torsion_force'].addTorsion(
hybrid_index_list[0], hybrid_index_list[1],
hybrid_index_list[2], hybrid_index_list[3],
hybrid_force_parameters
)
def _handle_nonbonded(self):
"""
Handle the nonbonded interactions defined in the new and old systems.
TODO
----
* Expand this docstring to explain the logic.
* A lot of this logic is duplicated, probably turn it into a couple of
functions.
"""
def _check_indices(idx1, idx2):
if idx1 != idx2:
errmsg = ("Attempting to add incorrect particle to hybrid "
"system")
raise ValueError(errmsg)
old_system_nonbonded_force = self._old_system_forces['NonbondedForce']
new_system_nonbonded_force = self._new_system_forces['NonbondedForce']
hybrid_to_old_map = self._hybrid_to_old_map
hybrid_to_new_map = self._hybrid_to_new_map
# Define new global parameters for NonbondedForce
self._hybrid_system_forces['standard_nonbonded_force'].addGlobalParameter('lambda_electrostatics_core', 0.0)
self._hybrid_system_forces['standard_nonbonded_force'].addGlobalParameter('lambda_sterics_core', 0.0)
self._hybrid_system_forces['standard_nonbonded_force'].addGlobalParameter("lambda_electrostatics_delete", 0.0)
self._hybrid_system_forces['standard_nonbonded_force'].addGlobalParameter("lambda_electrostatics_insert", 0.0)
# We have to loop through the particles in the system, because
# nonbonded force does not accept index
for particle_index in range(self._hybrid_system.getNumParticles()):
if particle_index in self._atom_classes['unique_old_atoms']:
# Get the parameters in the old system
old_index = hybrid_to_old_map[particle_index]
[charge, sigma, epsilon] = old_system_nonbonded_force.getParticleParameters(old_index)
# Add the particle to the hybrid custom sterics and
# electrostatics.
# turning off sterics in forward direction
check_index = self._hybrid_system_forces['core_sterics_force'].addParticle(
[sigma, epsilon, sigma, 0.0*epsilon, 1, 0]
)
_check_indices(particle_index, check_index)
# Add particle to the regular nonbonded force, but
# Lennard-Jones will be handled by CustomNonbondedForce
check_index = self._hybrid_system_forces['standard_nonbonded_force'].addParticle(
charge, sigma, 0.0*epsilon
)
_check_indices(particle_index, check_index)
# Charge will be turned off at
# lambda_electrostatics_delete = 0, on at
# lambda_electrostatics_delete = 1; kill charge with
# lambda_electrostatics_delete = 0 --> 1
self._hybrid_system_forces['standard_nonbonded_force'].addParticleParameterOffset(
'lambda_electrostatics_delete', particle_index,
-charge, 0*sigma, 0*epsilon
)
elif particle_index in self._atom_classes['unique_new_atoms']:
# Get the parameters in the new system
new_index = hybrid_to_new_map[particle_index]
[charge, sigma, epsilon] = new_system_nonbonded_force.getParticleParameters(new_index)
# Add the particle to the hybrid custom sterics and electrostatics
# turning on sterics in forward direction
check_index = self._hybrid_system_forces['core_sterics_force'].addParticle(
[sigma, 0.0*epsilon, sigma, epsilon, 0, 1]
)
_check_indices(particle_index, check_index)
# Add particle to the regular nonbonded force, but
# Lennard-Jones will be handled by CustomNonbondedForce
check_index = self._hybrid_system_forces['standard_nonbonded_force'].addParticle(
0.0, sigma, 0.0
) # charge starts at zero
_check_indices(particle_index, check_index)
# Charge will be turned off at lambda_electrostatics_insert = 0
# on at lambda_electrostatics_insert = 1;
# add charge with lambda_electrostatics_insert = 0 --> 1
self._hybrid_system_forces['standard_nonbonded_force'].addParticleParameterOffset(
'lambda_electrostatics_insert', particle_index,
+charge, 0, 0
)
elif particle_index in self._atom_classes['core_atoms']:
# Get the parameters in the new and old systems:
old_index = hybrid_to_old_map[particle_index]
[charge_old, sigma_old, epsilon_old] = old_system_nonbonded_force.getParticleParameters(old_index)
new_index = hybrid_to_new_map[particle_index]
[charge_new, sigma_new, epsilon_new] = new_system_nonbonded_force.getParticleParameters(new_index)
# Add the particle to the custom forces, interpolating between
# the two parameters; add steric params and zero electrostatics
# to core_sterics per usual
check_index = self._hybrid_system_forces['core_sterics_force'].addParticle(
[sigma_old, epsilon_old, sigma_new, epsilon_new, 0, 0])
_check_indices(particle_index, check_index)
# Still add the particle to the regular nonbonded force, but
# with zeroed out parameters; add old charge to
# standard_nonbonded and zero sterics
check_index = self._hybrid_system_forces['standard_nonbonded_force'].addParticle(
# this term is off due to epsilon = 0, but just set sigma to the initial value to not confuse things
charge_old, sigma_old, 0.0)
_check_indices(particle_index, check_index)
# Charge is charge_old at lambda_electrostatics = 0,
# charge_new at lambda_electrostatics = 1
# TODO: We could also interpolate the Lennard-Jones here
# instead of core_sterics force so that core_sterics_force
# could just be softcore.
# Interpolate between old and new charge with
# lambda_electrostatics core make sure to keep sterics off
self._hybrid_system_forces['standard_nonbonded_force'].addParticleParameterOffset(
'lambda_electrostatics_core', particle_index,
(charge_new - charge_old), 0, 0
)
# Otherwise, the particle is in the environment
else:
# The parameters will be the same in new and old system, so
# just take the old parameters
old_index = hybrid_to_old_map[particle_index]
[charge, sigma, epsilon] = old_system_nonbonded_force.getParticleParameters(old_index)
# Add the particle to the hybrid custom sterics, but they dont
# change; electrostatics are ignored
self._hybrid_system_forces['core_sterics_force'].addParticle(
[sigma, epsilon, sigma, epsilon, 0, 0]
)
# Add the environment atoms to the regular nonbonded force as
# well: should we be adding steric terms here, too?
self._hybrid_system_forces['standard_nonbonded_force'].addParticle(
charge, sigma, epsilon
)
# Now loop pairwise through (unique_old, unique_new) and add exceptions
# so that they never interact electrostatically
# (place into Nonbonded Force)
unique_old_atoms = self._atom_classes['unique_old_atoms']
unique_new_atoms = self._atom_classes['unique_new_atoms']
for old in unique_old_atoms:
for new in unique_new_atoms:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
old, new, 0.0*unit.elementary_charge**2,
1.0*unit.nanometers, 0.0*unit.kilojoules_per_mole)
# This is only necessary to avoid the 'All forces must have
# identical exclusions' rule
self._hybrid_system_forces['core_sterics_force'].addExclusion(old, new)
self._handle_interaction_groups()
self._handle_hybrid_exceptions()
self._handle_original_exceptions()
def _handle_interaction_groups(self):
"""
Create the appropriate interaction groups for the custom nonbonded
forces. The groups are:
1) Unique-old - core
2) Unique-old - environment
3) Unique-new - core
4) Unique-new - environment
5) Core - environment
6) Core - core
Unique-old and Unique new are prevented from interacting this way,
and intra-unique interactions occur in an unmodified nonbonded force.
Must be called after particles are added to the Nonbonded forces
TODO: we should also be adding the following interaction groups...
7) Unique-new - Unique-new
8) Unique-old - Unique-old
"""
# Get the force objects for convenience:
sterics_custom_force = self._hybrid_system_forces['core_sterics_force']
# Also prepare the atom classes
core_atoms = self._atom_classes['core_atoms']
unique_old_atoms = self._atom_classes['unique_old_atoms']
unique_new_atoms = self._atom_classes['unique_new_atoms']
environment_atoms = self._atom_classes['environment_atoms']
sterics_custom_force.addInteractionGroup(unique_old_atoms, core_atoms)
sterics_custom_force.addInteractionGroup(unique_old_atoms,
environment_atoms)
sterics_custom_force.addInteractionGroup(unique_new_atoms,
core_atoms)
sterics_custom_force.addInteractionGroup(unique_new_atoms,
environment_atoms)
sterics_custom_force.addInteractionGroup(core_atoms, environment_atoms)
sterics_custom_force.addInteractionGroup(core_atoms, core_atoms)
sterics_custom_force.addInteractionGroup(unique_new_atoms,
unique_new_atoms)
sterics_custom_force.addInteractionGroup(unique_old_atoms,
unique_old_atoms)
def _handle_hybrid_exceptions(self):
"""
Instead of excluding interactions that shouldn't occur, we provide
exceptions for interactions that were zeroed out but should occur.
"""
# TODO - are these actually used anywhere? Flake8 says no
old_system_nonbonded_force = self._old_system_forces['NonbondedForce']
new_system_nonbonded_force = self._new_system_forces['NonbondedForce']
# Prepare the atom classes
unique_old_atoms = self._atom_classes['unique_old_atoms']
unique_new_atoms = self._atom_classes['unique_new_atoms']
# Get the list of interaction pairs for which we need to set exceptions
unique_old_pairs = list(itertools.combinations(unique_old_atoms, 2))
unique_new_pairs = list(itertools.combinations(unique_new_atoms, 2))
# Add back the interactions of the old unique atoms, unless there are
# exceptions
for atom_pair in unique_old_pairs:
# Since the pairs are indexed in the dictionary by the old system
# indices, we need to convert
old_index_atom_pair = (self._hybrid_to_old_map[atom_pair[0]],
self._hybrid_to_old_map[atom_pair[1]])
# Now we check if the pair is in the exception dictionary
if old_index_atom_pair in self._old_system_exceptions:
[chargeProd, sigma, epsilon] = self._old_system_exceptions[old_index_atom_pair]
# if we are interpolating 1,4 exceptions then we have to
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd*0.0,
sigma, epsilon*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon
)
# Add exclusion to ensure exceptions are consistent
self._hybrid_system_forces['core_sterics_force'].addExclusion(
atom_pair[0], atom_pair[1]
)
# Check if the pair is in the reverse order and use that if so
elif old_index_atom_pair[::-1] in self._old_system_exceptions:
[chargeProd, sigma, epsilon] = self._old_system_exceptions[old_index_atom_pair[::-1]]
# If we are interpolating 1,4 exceptions then we have to
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd*0.0,
sigma, epsilon*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon)
# Add exclusion to ensure exceptions are consistent
self._hybrid_system_forces['core_sterics_force'].addExclusion(
atom_pair[0], atom_pair[1])
# TODO: work out why there's a bunch of commented out code here
# Excerpt:
# If it's not handled by an exception in the original system, we
# just add the regular parameters as an exception
# TODO: this implies that the old-old nonbonded interactions (those
# which are not exceptions) are always self-interacting throughout
# lambda protocol...
# Add back the interactions of the new unique atoms, unless there are
# exceptions
for atom_pair in unique_new_pairs:
# Since the pairs are indexed in the dictionary by the new system
# indices, we need to convert
new_index_atom_pair = (self._hybrid_to_new_map[atom_pair[0]],
self._hybrid_to_new_map[atom_pair[1]])
# Now we check if the pair is in the exception dictionary
if new_index_atom_pair in self._new_system_exceptions:
[chargeProd, sigma, epsilon] = self._new_system_exceptions[new_index_atom_pair]
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd*0.0,
sigma, epsilon*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
atom_pair[0], atom_pair[1]
)
# Check if the pair is present in the reverse order and use that if so
elif new_index_atom_pair[::-1] in self._new_system_exceptions:
[chargeProd, sigma, epsilon] = self._new_system_exceptions[new_index_atom_pair[::-1]]
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd*0.0,
sigma, epsilon*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
atom_pair[0], atom_pair[1], chargeProd, sigma, epsilon
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
atom_pair[0], atom_pair[1]
)
# TODO: work out why there's a bunch of commented out code here
# If it's not handled by an exception in the original system, we
# just add the regular parameters as an exception
@staticmethod
def _find_exception(force, index1, index2):
"""
Find the exception that corresponds to the given indices in the given
system
Parameters
----------
force : openmm.NonbondedForce object
System containing the exceptions
index1 : int
The index of the first atom (order is unimportant)
index2 : int
The index of the second atom (order is unimportant)
Returns
-------
exception_parameters : list
List of exception parameters
"""
index_set = {index1, index2}
# Loop through the exceptions and try to find one matching the criteria
for exception_idx in range(force.getNumExceptions()):
exception_parameters = force.getExceptionParameters(exception_idx)
if index_set==set(exception_parameters[:2]):
return exception_parameters
return []
def _handle_original_exceptions(self):
"""
This method ensures that exceptions present in the original systems are
present in the hybrid appropriately.
"""
# Get what we need to find the exceptions from the new and old systems:
old_system_nonbonded_force = self._old_system_forces['NonbondedForce']
new_system_nonbonded_force = self._new_system_forces['NonbondedForce']
hybrid_to_old_map = self._hybrid_to_old_map
hybrid_to_new_map = self._hybrid_to_new_map
# First, loop through the old system's exceptions and add them to the
# hybrid appropriately:
for exception_pair, exception_parameters in self._old_system_exceptions.items():
[index1_old, index2_old] = exception_pair
[chargeProd_old, sigma_old, epsilon_old] = exception_parameters
# Get hybrid indices:
index1_hybrid = self._old_to_hybrid_map[index1_old]
index2_hybrid = self._old_to_hybrid_map[index2_old]
index_set = {index1_hybrid, index2_hybrid}
# In this case, the interaction is only covered by the regular
# nonbonded force, and as such will be copied to that force
# In the unique-old case, it is handled elsewhere due to internal
# peculiarities regarding exceptions
if index_set.issubset(self._atom_classes['environment_atoms']):
self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_old,
sigma_old, epsilon_old
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
index1_hybrid, index2_hybrid
)
# We have already handled unique old - unique old exceptions
elif len(index_set.intersection(self._atom_classes['unique_old_atoms'])) == 2:
continue
# Otherwise, check if one of the atoms in the set is in the
# unique_old_group and the other is not:
elif len(index_set.intersection(self._atom_classes['unique_old_atoms'])) == 1:
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_old*0.0,
sigma_old, epsilon_old*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_old,
sigma_old, epsilon_old
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
index1_hybrid, index2_hybrid
)
# If the exception particles are neither solely old unique, solely
# environment, nor contain any unique old atoms, they are either
# core/environment or core/core
# In this case, we need to get the parameters from the exception in
# the other (new) system, and interpolate between the two
else:
# First get the new indices.
index1_new = hybrid_to_new_map[index1_hybrid]
index2_new = hybrid_to_new_map[index2_hybrid]
# Get the exception parameters:
new_exception_parms= self._find_exception(
new_system_nonbonded_force,
index1_new, index2_new)
# If there's no new exception, then we should just set the
# exception parameters to be the nonbonded parameters
if not new_exception_parms:
[charge1_new, sigma1_new, epsilon1_new] = new_system_nonbonded_force.getParticleParameters(index1_new)
[charge2_new, sigma2_new, epsilon2_new] = new_system_nonbonded_force.getParticleParameters(index2_new)
chargeProd_new = charge1_new * charge2_new
sigma_new = 0.5 * (sigma1_new + sigma2_new)
epsilon_new = unit.sqrt(epsilon1_new*epsilon2_new)
else:
[index1_new, index2_new, chargeProd_new, sigma_new, epsilon_new] = new_exception_parms
# Interpolate between old and new
exception_index = self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_old,
sigma_old, epsilon_old
)
self._hybrid_system_forces['standard_nonbonded_force'].addExceptionParameterOffset(
'lambda_electrostatics_core', exception_index,
(chargeProd_new - chargeProd_old), 0, 0
)
self._hybrid_system_forces['standard_nonbonded_force'].addExceptionParameterOffset(
'lambda_sterics_core', exception_index, 0,
(sigma_new - sigma_old), (epsilon_new - epsilon_old)
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
index1_hybrid, index2_hybrid
)
# Now, loop through the new system to collect remaining interactions.
# The only that remain here are uniquenew-uniquenew, uniquenew-core,
# and uniquenew-environment. There might also be core-core, since not
# all core-core exceptions exist in both
for exception_pair, exception_parameters in self._new_system_exceptions.items():
[index1_new, index2_new] = exception_pair
[chargeProd_new, sigma_new, epsilon_new] = exception_parameters
# Get hybrid indices:
index1_hybrid = self._new_to_hybrid_map[index1_new]
index2_hybrid = self._new_to_hybrid_map[index2_new]
index_set = {index1_hybrid, index2_hybrid}
# If it's a subset of unique_new_atoms, then this is an
# intra-unique interaction and should have its exceptions
# specified in the regular nonbonded force. However, this is
# handled elsewhere as above due to peculiarities with exception
# handling
if index_set.issubset(self._atom_classes['unique_new_atoms']):
continue
# Look for the final class- interactions between uniquenew-core and
# uniquenew-environment. They are treated similarly: they are
# simply on and constant the entire time (as a valence term)
elif len(index_set.intersection(self._atom_classes['unique_new_atoms'])) > 0:
if self._interpolate_14s:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_new*0.0,
sigma_new, epsilon_new*0.0
)
else:
self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid, chargeProd_new,
sigma_new, epsilon_new
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
index1_hybrid, index2_hybrid
)
# However, there may be a core exception that exists in one system
# but not the other (ring closure)
elif index_set.issubset(self._atom_classes['core_atoms']):
# Get the old indices
try:
index1_old = self._new_to_old_map[index1_new]
index2_old = self._new_to_old_map[index2_new]
except KeyError:
continue
# See if it's also in the old nonbonded force. if it is, then we don't need to add it.
# But if it's not, we need to interpolate
if not self._find_exception(old_system_nonbonded_force, index1_old, index2_old):
[charge1_old, sigma1_old, epsilon1_old] = old_system_nonbonded_force.getParticleParameters(index1_old)
[charge2_old, sigma2_old, epsilon2_old] = old_system_nonbonded_force.getParticleParameters(index2_old)
chargeProd_old = charge1_old*charge2_old
sigma_old = 0.5 * (sigma1_old + sigma2_old)
epsilon_old = unit.sqrt(epsilon1_old*epsilon2_old)
exception_index = self._hybrid_system_forces['standard_nonbonded_force'].addException(
index1_hybrid, index2_hybrid,
chargeProd_old, sigma_old,
epsilon_old)
self._hybrid_system_forces['standard_nonbonded_force'].addExceptionParameterOffset(
'lambda_electrostatics_core', exception_index,
(chargeProd_new - chargeProd_old), 0, 0
)
self._hybrid_system_forces['standard_nonbonded_force'].addExceptionParameterOffset(
'lambda_sterics_core', exception_index, 0,
(sigma_new - sigma_old), (epsilon_new - epsilon_old)
)
self._hybrid_system_forces['core_sterics_force'].addExclusion(
index1_hybrid, index2_hybrid
)
def _handle_old_new_exceptions(self):
"""
Find the exceptions associated with old-old and old-core interactions,
as well as new-new and new-core interactions. These exceptions will
be placed in CustomBondedForce that will interpolate electrostatics and
a softcore potential.
TODO
----
* Move old_new_bond_exceptions to a dictionary or similar.
"""
old_new_nonbonded_exceptions = "U_electrostatics + U_sterics;"
if self._softcore_LJ_v2:
old_new_nonbonded_exceptions += "U_sterics = select(step(r - r_LJ), 4*epsilon*x*(x-1.0), U_sterics_quad);"
old_new_nonbonded_exceptions += "U_sterics_quad = Force*(((r - r_LJ)^2)/2 - (r - r_LJ)) + U_sterics_cut;"
old_new_nonbonded_exceptions += "U_sterics_cut = 4*epsilon*((sigma/r_LJ)^6)*(((sigma/r_LJ)^6) - 1.0);"
old_new_nonbonded_exceptions += "Force = -4*epsilon*((-12*sigma^12)/(r_LJ^13) + (6*sigma^6)/(r_LJ^7));"
old_new_nonbonded_exceptions += "x = (sigma/r)^6;"
old_new_nonbonded_exceptions += "r_LJ = softcore_alpha*((26/7)*(sigma^6)*lambda_sterics_deprecated)^(1/6);"
old_new_nonbonded_exceptions += "lambda_sterics_deprecated = new_interaction*(1.0 - lambda_sterics_insert) + old_interaction*lambda_sterics_delete;"
else:
old_new_nonbonded_exceptions += "U_sterics = 4*epsilon*x*(x-1.0); x = (sigma/reff_sterics)^6;"
old_new_nonbonded_exceptions += "reff_sterics = sigma*((softcore_alpha*lambda_alpha + (r/sigma)^6))^(1/6);"
old_new_nonbonded_exceptions += "reff_sterics = sigma*((softcore_alpha*lambda_alpha + (r/sigma)^6))^(1/6);" # effective softcore distance for sterics
old_new_nonbonded_exceptions += "lambda_alpha = new_interaction*(1-lambda_sterics_insert) + old_interaction*lambda_sterics_delete;"
old_new_nonbonded_exceptions += "U_electrostatics = (lambda_electrostatics_insert * unique_new + unique_old * (1 - lambda_electrostatics_delete)) * ONE_4PI_EPS0*chargeProd/r;"
old_new_nonbonded_exceptions += "ONE_4PI_EPS0 = %f;" % ONE_4PI_EPS0
old_new_nonbonded_exceptions += "epsilon = (1-lambda_sterics)*epsilonA + lambda_sterics*epsilonB;" # interpolation
old_new_nonbonded_exceptions += "sigma = (1-lambda_sterics)*sigmaA + lambda_sterics*sigmaB;"
old_new_nonbonded_exceptions += "lambda_sterics = new_interaction*lambda_sterics_insert + old_interaction*lambda_sterics_delete;"
old_new_nonbonded_exceptions += "new_interaction = delta(1-unique_new); old_interaction = delta(1-unique_old);"
nonbonded_exceptions_force = openmm.CustomBondForce(
old_new_nonbonded_exceptions)
name = f"{nonbonded_exceptions_force.__class__.__name__}_exceptions"
nonbonded_exceptions_force.setName(name)
self._hybrid_system.addForce(nonbonded_exceptions_force)
# For reference, set name in force dict
self._hybrid_system_forces['old_new_exceptions_force'] = nonbonded_exceptions_force
if self._softcore_LJ_v2:
nonbonded_exceptions_force.addGlobalParameter(
"softcore_alpha", self._softcore_LJ_v2_alpha
)
else:
nonbonded_exceptions_force.addGlobalParameter(
"softcore_alpha", self._softcore_alpha
)
# electrostatics insert
nonbonded_exceptions_force.addGlobalParameter(
"lambda_electrostatics_insert", 0.0
)
# electrostatics delete
nonbonded_exceptions_force.addGlobalParameter(
"lambda_electrostatics_delete", 0.0
)
# sterics insert
nonbonded_exceptions_force.addGlobalParameter(
"lambda_sterics_insert", 0.0
)
# steric delete
nonbonded_exceptions_force.addGlobalParameter(
"lambda_sterics_delete", 0.0
)
for parameter in ['chargeProd','sigmaA', 'epsilonA', 'sigmaB',
'epsilonB', 'unique_old', 'unique_new']:
nonbonded_exceptions_force.addPerBondParameter(parameter)
# Prepare for exceptions loop by grabbing nonbonded forces,
# hybrid_to_old/new maps
old_system_nonbonded_force = self._old_system_forces['NonbondedForce']
new_system_nonbonded_force = self._new_system_forces['NonbondedForce']
hybrid_to_old_map = self._hybrid_to_old_map
hybrid_to_new_map = self._hybrid_to_new_map
# First, loop through the old system's exceptions and add them to the
# hybrid appropriately:
for exception_pair, exception_parameters in self._old_system_exceptions.items():
[index1_old, index2_old] = exception_pair
[chargeProd_old, sigma_old, epsilon_old] = exception_parameters
# Get hybrid indices:
index1_hybrid = self._old_to_hybrid_map[index1_old]
index2_hybrid = self._old_to_hybrid_map[index2_old]
index_set = {index1_hybrid, index2_hybrid}
# Otherwise, check if one of the atoms in the set is in the
# unique_old_group and the other is not:
if (len(index_set.intersection(self._atom_classes['unique_old_atoms'])) > 0 and
(chargeProd_old.value_in_unit_system(unit.md_unit_system) != 0.0 or
epsilon_old.value_in_unit_system(unit.md_unit_system) != 0.0)):
if self._interpolate_14s:
# If we are interpolating 1,4s, then we anneal this term
# off; otherwise, the exception force is constant and
# already handled in the standard nonbonded force
nonbonded_exceptions_force.addBond(
index1_hybrid, index2_hybrid,
[chargeProd_old, sigma_old, epsilon_old, sigma_old,
epsilon_old*0.0, 1, 0]
)
# Next, loop through the new system's exceptions and add them to the
# hybrid appropriately
for exception_pair, exception_parameters in self._new_system_exceptions.items():
[index1_new, index2_new] = exception_pair
[chargeProd_new, sigma_new, epsilon_new] = exception_parameters
# Get hybrid indices:
index1_hybrid = self._new_to_hybrid_map[index1_new]
index2_hybrid = self._new_to_hybrid_map[index2_new]
index_set = {index1_hybrid, index2_hybrid}
# Look for the final class- interactions between uniquenew-core and
# uniquenew-environment. They are treated
# similarly: they are simply on and constant the entire time
# (as a valence term)
if (len(index_set.intersection(self._atom_classes['unique_new_atoms'])) > 0 and
(chargeProd_new.value_in_unit_system(unit.md_unit_system) != 0.0 or
epsilon_new.value_in_unit_system(unit.md_unit_system) != 0.0)):
if self._interpolate_14s:
# If we are interpolating 1,4s, then we anneal this term
# on; otherwise, the exception force is constant and
# already handled in the standard nonbonded force
nonbonded_exceptions_force.addBond(
index1_hybrid, index2_hybrid,
[chargeProd_new, sigma_new, epsilon_new*0.0,
sigma_new, epsilon_new, 0, 1]
)
def _compute_hybrid_positions(self):
"""
The positions of the hybrid system. Dimensionality is (n_environment +
n_core + n_old_unique + n_new_unique),
The positions are assigned by first copying all the mapped positions
from the old system in, then copying the
mapped positions from the new system. This means that there is an
assumption that the positions common to old and new are the same
(which is the case for perses as-is).
Returns
-------
hybrid_positions : np.ndarray [n, 3]
Positions of the hybrid system, in nm
"""
# Get unitless positions
old_pos_without_units = np.array(
self._old_positions.value_in_unit(unit.nanometer))
new_pos_without_units = np.array(
self._new_positions.value_in_unit(unit.nanometer))
# Determine the number of particles in the system
n_atoms_hybrid = self._hybrid_system.getNumParticles()
# Initialize an array for hybrid positions
hybrid_pos_array = np.zeros([n_atoms_hybrid, 3])
# Loop through the old system indices, and assign positions.
for old_idx, hybrid_idx in self._old_to_hybrid_map.items():
hybrid_pos_array[hybrid_idx, :] = old_pos_without_units[old_idx, :]
# Do the same for new indices. Note that this overwrites some
# coordinates, but as stated above, the assumption is that these are
# the same.
for new_idx, hybrid_idx in self._new_to_hybrid_map.items():
hybrid_pos_array[hybrid_idx, :] = new_pos_without_units[new_idx, :]
return unit.Quantity(hybrid_pos_array, unit=unit.nanometers)
def _create_mdtraj_topology(self):
"""
Create an MDTraj trajectory of the hybrid system.
Note
----
This is purely for writing out trajectories and is not expected to be
parametrized.
TODO
----
* A lot of this can be simplified / reworked.
"""
old_top = mdt.Topology.from_openmm(self._old_topology)
new_top = mdt.Topology.from_openmm(self._new_topology)
hybrid_topology = copy.deepcopy(old_top)
added_atoms = dict()
# Get the core atoms in the new index system (as opposed to the hybrid
# index system). We will need this later
core_atoms_new_indices = set(self._core_old_to_new_map.values())
# Now, add each unique new atom to the topology (this is the same order
# as the system)
for particle_idx in self._unique_new_atoms:
new_particle_hybrid_idx = self._new_to_hybrid_map[particle_idx]
new_system_atom = new_top.atom(particle_idx)
# First, we get the residue in the new system associated with this
# atom
new_system_res = new_system_atom.residue
# Next, we have to enumerate the other atoms in that residue to
# find mapped atoms
new_system_atom_set = {atom.index for atom in new_system_res.atoms}
# Now, we find the subset of atoms that are mapped. These must be
# in the "core" category, since they are mapped and part of a
# changing residue
mapped_new_atom_indices = core_atoms_new_indices.intersection(
new_system_atom_set)
# Now get the old indices of the above atoms so that we can find
# the appropriate residue in the old system for this we can use the
# new to old atom map
mapped_old_atom_indices = [self._new_to_old_map[atom_idx] for
atom_idx in mapped_new_atom_indices]
# We can just take the first one--they all have the same residue
first_mapped_old_atom_index = mapped_old_atom_indices[0]
# Get the atom object corresponding to this index from the hybrid
# (which is a deepcopy of the old)
mapped_hybrid_system_atom = hybrid_topology.atom(
first_mapped_old_atom_index)
# Get the residue that is relevant to this atom
mapped_residue = mapped_hybrid_system_atom.residue
# Add the atom using the mapped residue
added_atoms[new_particle_hybrid_idx] = hybrid_topology.add_atom(
new_system_atom.name,
new_system_atom.element,
mapped_residue)
# Now loop through the bonds in the new system, and if the bond
# contains a unique new atom, then add it to the hybrid topology
for (atom1, atom2) in new_top.bonds:
at1_hybrid_idx = self._new_to_hybrid_map[atom1.index]
at2_hybrid_idx = self._new_to_hybrid_map[atom2.index]
# If at least one atom is in the unique new class, we need to add
# it to the hybrid system
at1_uniq = at1_hybrid_idx in self._atom_classes['unique_new_atoms']
at2_uniq = at2_hybrid_idx in self._atom_classes['unique_new_atoms']
if at1_uniq or at2_uniq:
if at1_uniq:
atom1_to_bond = added_atoms[at1_hybrid_idx]
else:
old_idx = self._hybrid_to_old_map[at1_hybrid_idx]
atom1_to_bond = hybrid_topology.atom(old_idx)
if at2_uniq:
atom2_to_bond = added_atoms[at2_hybrid_idx]
else:
old_idx = self._hybrid_to_old_map[at2_hybrid_idx]
atom2_to_bond = hybrid_topology.atom(old_idx)
hybrid_topology.add_bond(atom1_to_bond, atom2_to_bond)
return hybrid_topology
def _create_hybrid_topology(self):
"""
Create a hybrid openmm.app.Topology from the input old and new
Topologies.
Note
----
* This is not intended for parameterisation purposes, but instead
for system visualisation.
* Unlike the MDTraj Topology object, the residues of the alchemical
species are not squashed.
"""
hybrid_top = app.Topology()
# In the first instance, create a list of necessary atoms from
# both old & new Topologies
atom_list = []
# iterate once over the topologies for speed
old_topology_atoms = list(self._old_topology.atoms())
new_topology_atoms = list(self._new_topology.atoms())
for pidx in range(self.hybrid_system.getNumParticles()):
if pidx in self._hybrid_to_old_map:
idx = self._hybrid_to_old_map[pidx]
atom_list.append(old_topology_atoms[idx])
else:
idx = self._hybrid_to_new_map[pidx]
atom_list.append(new_topology_atoms[idx])
# Now we loop over the atoms and add them in alongside chains & resids
# Non ideal variables to track the previous set of residues & chains
# without having to constantly search backwards
prev_res = None
prev_chain = None
for at in atom_list:
if at.residue.chain != prev_chain:
hybrid_chain = hybrid_top.addChain()
prev_chain = at.residue.chain
if at.residue != prev_res:
hybrid_residue = hybrid_top.addResidue(
at.residue.name, hybrid_chain, at.residue.id
)
prev_res = at.residue
hybrid_atom = hybrid_top.addAtom(
at.name, at.element, hybrid_residue, at.id
)
# Next we deal with bonds
# loop over the topology atoms once to avoid repeated calls
hybrid_top_atom_list = list(hybrid_top.atoms())
# First we add in all the old topology bonds
for bond in self._old_topology.bonds():
at1 = self.old_to_hybrid_atom_map[bond.atom1.index]
at2 = self.old_to_hybrid_atom_map[bond.atom2.index]
hybrid_top.addBond(
hybrid_top_atom_list[at1],
hybrid_top_atom_list[at2],
bond.type, bond.order,
)
# Finally we add in all the bonds from the unique atoms in the
# new Topology
for bond in self._new_topology.bonds():
at1 = self.new_to_hybrid_atom_map[bond.atom1.index]
at2 = self.new_to_hybrid_atom_map[bond.atom2.index]
if ((at1 in self._atom_classes['unique_new_atoms']) or
(at2 in self._atom_classes['unique_new_atoms'])):
hybrid_top.addBond(
hybrid_top_atom_list[at1],
hybrid_top_atom_list[at2],
bond.type, bond.order,
)
return hybrid_top
def old_positions(self, hybrid_positions):
"""
From input hybrid positions, get the positions which would correspond
to the old system
Parameters
----------
hybrid_positions : [n, 3] np.ndarray or simtk.unit.Quantity
The positions of the hybrid system
Returns
-------
old_positions : [m, 3] np.ndarray with unit
The positions of the old system
"""
n_atoms_old = self._old_system.getNumParticles()
# making sure hybrid positions are simtk.unit.Quantity objects
if not isinstance(hybrid_positions, unit.Quantity):
hybrid_positions = unit.Quantity(hybrid_positions,
unit=unit.nanometer)
old_positions = unit.Quantity(np.zeros([n_atoms_old, 3]),
unit=unit.nanometer)
for idx in range(n_atoms_old):
hyb_idx = self._old_to_hybrid_map[idx]
old_positions[idx, :] = hybrid_positions[hyb_idx, :]
return old_positions
def new_positions(self, hybrid_positions):
"""
From input hybrid positions, get the positions which could correspond
to the new system.
Parameters
----------
hybrid_positions : [n, 3] np.ndarray or simtk.unit.Quantity
The positions of the hybrid system
Returns
-------
new_positions : [m, 3] np.ndarray with unit
The positions of the new system
"""
n_atoms_new = self._new_system.getNumParticles()
# making sure hybrid positions are simtk.unit.Quantity objects
if not isinstance(hybrid_positions, unit.Quantity):
hybrid_positions = unit.Quantity(hybrid_positions,
unit=unit.nanometer)
new_positions = unit.Quantity(np.zeros([n_atoms_new, 3]),
unit=unit.nanometer)
for idx in range(n_atoms_new):
hyb_idx = self._new_to_hybrid_map[idx]
new_positions[idx, :] = hybrid_positions[hyb_idx, :]
return new_positions
@property
def hybrid_system(self):
"""
The hybrid system.
Returns
-------
hybrid_system : openmm.System
The system representing a hybrid between old and new topologies
"""
return self._hybrid_system
@property
def new_to_hybrid_atom_map(self):
"""
Give a dictionary that maps new system atoms to the hybrid system.
Returns
-------
new_to_hybrid_atom_map : dict of {int, int}
The mapping of atoms from the new system to the hybrid
"""
return self._new_to_hybrid_map
@property
def old_to_hybrid_atom_map(self):
"""
Give a dictionary that maps old system atoms to the hybrid system.
Returns
-------
old_to_hybrid_atom_map : dict of {int, int}
The mapping of atoms from the old system to the hybrid
"""
return self._old_to_hybrid_map
@property
def hybrid_positions(self):
"""
The positions of the hybrid system. Dimensionality is (n_environment +
n_core + n_old_unique + n_new_unique).
The positions are assigned by first copying all the mapped positions
from the old system in, then copying the mapped positions from the new
system.
Returns
-------
hybrid_positions : [n, 3] Quantity nanometers
"""
return self._hybrid_positions
@property
def hybrid_topology(self):
"""
An MDTraj hybrid topology for the purpose of writing out trajectories.
Note that we do not expect this to be able to be parameterized by the
openmm forcefield class.
Returns
-------
hybrid_topology : mdtraj.Topology
"""
return self._hybrid_topology
@property
def omm_hybrid_topology(self):
"""
An OpenMM format of the hybrid topology. Also cannot be used to
parameterize system, only to write out trajectories.
Returns
-------
hybrid_topology : simtk.openmm.app.Topology
.. versionchanged:: OpenFE 0.11
Now returns a Topology directly constructed from the input
old / new Topologies, instead of trying to roundtrip an
mdtraj topology.
"""
return self._omm_hybrid_topology
@property
def has_virtual_sites(self):
"""
Checks the hybrid system and tells us if we have any virtual sites.
Returns
-------
bool
``True`` if there are virtual sites, otherwise ``False``.
"""
for ix in range(self._hybrid_system.getNumParticles()):
if self._hybrid_system.isVirtualSite(ix):
return True
return False
================================================
FILE: src/openfe/protocols/openmm_rfe/_rfe_utils/topologyhelpers.py
================================================
# This code is in parts based on TopologyProposal in perses
# (https://github.com/choderalab/perses)
# The eventual goal is to move this to the OpenFE alchemical topology
# building toolsets.
# LICENSE: MIT
# turn off formatting since this is mostly vendored code
# fmt: off
import itertools
import logging
import warnings
from copy import deepcopy
from typing import Optional, Union
import mdtraj as mdt
import numpy as np
import numpy.typing as npt
from mdtraj.core.residue_names import _SOLVENT_TYPES
from openff.units import Quantity, unit
from openmm import NonbondedForce, System, app
from openmm import unit as omm_unit
from openfe import SolventComponent
logger = logging.getLogger(__name__)
def _get_ion_and_water_parameters(
topology: app.Topology,
system: System,
ion_resname: str,
water_resname: str = 'HOH',
):
"""
Get ion, and water (oxygen and hydrogen) atoms parameters.
Parameters
----------
topology : app.Topology
The topology to search for the ion and water
system : app.System
The system associated with the input topology object.
ion_resname : str
The residue name of the ion to get parameters for
water_resname : str
The residue name of the water to get parameters for. Default 'HOH'.
Returns
-------
ion_charge : float
The partial charge of the ion atom
ion_sigma : float
The NonbondedForce sigma parameter of the ion atom
ion_epsilon : float
The NonbondedForce epsilon parameter of the ion atom
o_charge : float
The partial charge of the water oxygen.
h_charge : float
The partial charge of the water hydrogen.
Raises
------
ValueError
If there are no ``ion_resname`` or ``water_resname`` named residues in
the input ``topology``.
Attribution
-----------
Based on `perses.utils.charge_changing.get_ion_and_water_parameters`.
"""
def _find_atom(topology, resname, elementname):
for atom in topology.atoms():
if atom.residue.name == resname:
if (elementname is None or atom.element.symbol == elementname):
return atom.index
errmsg = ("Error encountered when attempting to explicitly handle "
"charge changes using an alchemical water. No residue "
f"named: {resname} found, with element {elementname}")
raise ValueError(errmsg)
ion_index = _find_atom(topology, ion_resname, None)
oxygen_index = _find_atom(topology, water_resname, 'O')
hydrogen_index = _find_atom(topology, water_resname, 'H')
nbf = [i for i in system.getForces()
if isinstance(i, NonbondedForce)][0]
ion_charge, ion_sigma, ion_epsilon = nbf.getParticleParameters(ion_index)
o_charge, _, _ = nbf.getParticleParameters(oxygen_index)
h_charge, _, _ = nbf.getParticleParameters(hydrogen_index)
return ion_charge, ion_sigma, ion_epsilon, o_charge, h_charge
def _fix_alchemical_water_atom_mapping(
system_mapping: dict[str, Union[dict[int, int], list[int]]],
b_idx: int,
) -> None:
"""
In-place fix atom mapping to account for alchemical water.
Parameters
----------
system_mapping : dict
Dictionary of system mappings.
b_idx : int
The index of the state B particle.
"""
a_idx = system_mapping['new_to_old_atom_map'][b_idx]
# Note, because these are already shared positions, we don't
# append alchemical molecule indices in the new & old molecule
# i.e. the `old_mol_indices` and `new_mol_indices` lists
# remove atom from the environment atom map
system_mapping['old_to_new_env_atom_map'].pop(a_idx)
system_mapping['new_to_old_env_atom_map'].pop(b_idx)
# add atom to the new_to_old_core atom maps
system_mapping['old_to_new_core_atom_map'][a_idx] = b_idx
system_mapping['new_to_old_core_atom_map'][b_idx] = a_idx
def handle_alchemical_waters(
water_resids: list[int], topology: app.Topology,
system: System, system_mapping: dict,
charge_difference: int,
solvent_component: SolventComponent,
):
"""
Add alchemical waters from a pre-defined list.
Parameters
----------
water_resids : list[int]
A list of alchemical water residues.
topology : app.Topology
The topology to search for the ion and water
system : app.System
The system associated with the input topology object.
system_mapping : dictionary
A dictionary of system mappings between the stateA and stateB systems
charge_difference : int
The charge difference between state A and state B.
positive_ion_resname : str
The name of a positive ion to replace the water with if the absolute
charge difference is positive.
negative_ion_resname : str
The name of a negative ion to replace the water with if the absolute
charge difference is negative.
water_resname : str
The residue name of the water to get parameters for. Default 'HOH'.
Raises
------
ValueError
If the absolute charge difference is not equalent to the number of
alchemical water resids.
If the chosen alchemical water has virtual sites (i.e. is not
a 3 site water molecule).
Attribution
-----------
Based on `perses.utils.charge_changing.transform_waters_into_ions`.
"""
if abs(charge_difference) != len(water_resids):
errmsg = ("There should be as many alchemical water residues: "
f"{len(water_resids)} as the absolute charge "
f"difference: {abs(charge_difference)}")
raise ValueError(errmsg)
if charge_difference > 0:
ion_resname = solvent_component.positive_ion.strip('-+').upper()
elif charge_difference < 0:
ion_resname = solvent_component.negative_ion.strip('-+').upper()
# if there's no charge difference then just skip altogether
else:
return None
ion_charge, ion_sigma, ion_epsilon, o_charge, h_charge = _get_ion_and_water_parameters(
topology, system, ion_resname,
'HOH', # Modeller always adds HOH waters
)
# get the nonbonded forces
nbfrcs = [i for i in system.getForces()
if isinstance(i, NonbondedForce)]
if len(nbfrcs) > 1:
raise ValueError("Too many NonbondedForce forces found")
# for convenience just grab the first & only entry
nbf = nbfrcs[0]
# Loop through residues, check if they match the residue index
# mutate the atom as necessary
for res in topology.residues():
if res.index in water_resids:
# if the number of atoms > 3, then we have virtual sites which are
# not supported currently
if len([at for at in res.atoms()]) > 3:
errmsg = ("Non 3-site waters (i.e. waters with virtual sites) "
"are not currently supported as alchemical waters")
raise ValueError(errmsg)
for at in res.atoms():
idx = at.index
charge, sigma, epsilon = nbf.getParticleParameters(idx)
_fix_alchemical_water_atom_mapping(system_mapping, idx)
if charge == o_charge:
nbf.setParticleParameters(
idx, ion_charge, ion_sigma, ion_epsilon
)
else:
if charge != h_charge:
errmsg = ("modifying an atom that doesn't match known "
"water parameters")
raise ValueError(errmsg)
nbf.setParticleParameters(idx, 0.0, sigma, epsilon)
def get_alchemical_waters(
topology: app.Topology,
positions: npt.NDArray,
charge_difference: int,
distance_cutoff: Quantity = 0.8 * unit.nanometer,
) -> list[int]:
"""
Pick a list of waters to be used for alchemical charge correction.
Parameters
----------
topology : openmm.app.Topology
The topology to search for an alchemical water.
positions : npt.NDArray
The coordinates of the atoms associated with the ``topology``.
charge_difference : int
The charge difference between the two end states
calculated as stateA_formal_charge - stateB_formal_charge.
distance_cutoff : openff.units.Quantity
The minimum distance away from the solutes from which an alchemical
water can be chosen.
Returns
-------
chosen_residues : list[int]
A list of residue indices for each chosen alchemical water.
Notes
-----
Based off perses.utils.charge_changing.get_water_indices.
"""
# if the charge difference is 0 then no waters are needed
# return early with an empty list
if charge_difference == 0:
return []
# construct a new mdt trajectory
traj = mdt.Trajectory(
positions[np.newaxis, ...],
mdt.Topology.from_openmm(topology)
)
water_atoms = traj.topology.select("water")
solvent_residue_names = list(_SOLVENT_TYPES)
solute_atoms = [atom.index for atom in traj.topology.atoms
if atom.residue.name not in solvent_residue_names]
excluded_waters = mdt.compute_neighbors(
traj, distance_cutoff.to(unit.nanometer).m,
solute_atoms, haystack_indices=water_atoms,
periodic=True,
)[0]
solvent_indices = set([
atom.residue.index for atom in traj.topology.atoms
if (atom.index in water_atoms) and (atom.index not in excluded_waters)
])
if len(solvent_indices) < 1:
errmsg = ("There are no waters outside of a "
f"{distance_cutoff.to(unit.nanometer)} nanometer distance "
"of the system solutes to be used as alchemical waters")
raise ValueError(errmsg)
# unlike the original perses approach, we stick to the first water index
# in order to make sure we somewhat reproducibily pick the same water
chosen_residues = list(solvent_indices)[:abs(charge_difference)]
return chosen_residues
def combined_topology(topology1: app.Topology,
topology2: app.Topology,
exclude_resids: Optional[npt.NDArray] = None,):
"""
Create a new topology combining these two topologies.
The box information from the *first* topology will be copied over
Parameters
----------
topology1 : openmm.app.Topology
Topology of the template system to graft topology2 into.
topology2 : openmm.app.Topology
Topology to combine (not in place) with topology1.
exclude_resids : npt.NDArray
Residue indices in topology 1 to exclude from the combined topology.
Returns
-------
new : openmm.app.Topology
appended_resids : npt.NDArray
Residue indices of the residues appended from topology2 in the new
topology.
"""
if exclude_resids is None:
exclude_resids = np.array([])
top = app.Topology()
# create list of excluded residues from topology
excluded_res = [
r for r in topology1.residues() if r.index in exclude_resids
]
# get a list of all excluded atoms
excluded_atoms = set(itertools.chain.from_iterable(
r.atoms() for r in excluded_res)
)
# add new copies of selected chains, residues, and atoms; keep mapping
# of old atoms to new for adding bonds later
old_to_new_atom_map = {}
appended_resids = []
for chain_id, chain in enumerate(
itertools.chain(topology1.chains(), topology2.chains())):
# TODO: is chain ID int or str? I recall it being int in MDTraj....
# are there any issues if we just add a blank chain?
new_chain = top.addChain(chain_id)
for residue in chain.residues():
if residue in excluded_res:
continue
new_res = top.addResidue(residue.name,
new_chain,
residue.id)
# append the new resindex if it's part of topology2
if residue in list(topology2.residues()):
appended_resids.append(new_res.index)
for atom in residue.atoms():
new_atom = top.addAtom(atom.name,
atom.element,
new_res,
atom.id)
old_to_new_atom_map[atom] = new_atom
# figure out which bonds to keep: drop any that involve removed atoms
def atoms_for_bond(bond):
return {bond.atom1, bond.atom2}
keep_bonds = (bond for bond in itertools.chain(topology1.bonds(),
topology2.bonds())
if not (atoms_for_bond(bond) & excluded_atoms))
# add bonds to topology
for bond in keep_bonds:
top.addBond(old_to_new_atom_map[bond.atom1],
old_to_new_atom_map[bond.atom2],
bond.type,
bond.order)
# Copy over the box vectors
top.setPeriodicBoxVectors(topology1.getPeriodicBoxVectors())
return top, np.array(appended_resids)
def _get_indices(topology, resids):
"""
Get the atoms indices from an array of residue indices in an OpenMM Topology
Parameters
----------
topology : openmm.app.Topology
Topology to search from.
resids : npt.NDArrayLike
An array of residue indices which match the residues we want to get
atom indices for.
"""
# create list of openmm residues
top_res = [r for r in topology.residues() if r.index in resids]
# get a list of all atoms in residues
top_atoms = list(itertools.chain.from_iterable(r.atoms() for r in top_res))
return [at.index for at in top_atoms]
def _remove_constraints(old_to_new_atom_map, old_system, old_topology,
new_system, new_topology):
"""
Adapted from Perses' Topology Proposal. Adjusts atom mapping to account for
any bonds that are constrained but change in length.
Parameters
----------
old_to_new_atom_map : dict of int : int
Atom mapping between the old and new systems.
old_system : openmm.app.System
System of the "old" alchemical state.
old_topology : openmm.app.Topology
Topology of the "old" alchemical state.
new_system : openmm.app.System
System of the "new" alchemical state.
new_topology : openmm.app.Topology
Topology of the "new" alchemical state.
Returns
-------
no_const_old_to_new_atom_map : dict of int : int
Adjusted version of the input mapping but with atoms involving changes
in lengths of constrained bonds removed.
TODO
----
* Very slow, needs refactoring
* Can we drop having topologies as inputs here?
"""
from collections import Counter
no_const_old_to_new_atom_map = deepcopy(old_to_new_atom_map)
h_elem = app.Element.getByAtomicNumber(1)
old_H_atoms = {i for i, atom in enumerate(old_topology.atoms())
if atom.element == h_elem and i in old_to_new_atom_map}
new_H_atoms = {i for i, atom in enumerate(new_topology.atoms())
if atom.element == h_elem and i in old_to_new_atom_map.values()}
def pick_H(i, j, x, y) -> int:
"""Identify which atom to remove to resolve constraint violation
i maps to x, j maps to y
Returns either i or j (whichever is H) to remove from mapping
"""
if i in old_H_atoms or x in new_H_atoms:
return i
elif j in old_H_atoms or y in new_H_atoms:
return j
else:
raise ValueError(f"Couldn't resolve constraint demapping for atoms"
f" A: {i}-{j} B: {x}-{y}")
old_constraints: dict[[int, int], float] = dict()
for idx in range(old_system.getNumConstraints()):
atom1, atom2, length = old_system.getConstraintParameters(idx)
if atom1 in old_to_new_atom_map and atom2 in old_to_new_atom_map:
old_constraints[atom1, atom2] = length
new_constraints = dict()
for idx in range(new_system.getNumConstraints()):
atom1, atom2, length = new_system.getConstraintParameters(idx)
if (atom1 in old_to_new_atom_map.values() and
atom2 in old_to_new_atom_map.values()):
new_constraints[atom1, atom2] = length
# there are two reasons constraints would invalidate a mapping entry
# 1) length of constraint changed (but both constrained)
# 2) constraint removed to harmonic bond (only one constrained)
to_del = []
for (i, j), l_old in old_constraints.items():
x, y = old_to_new_atom_map[i], old_to_new_atom_map[j]
try:
l_new = new_constraints.pop((x, y))
except KeyError:
try:
l_new = new_constraints.pop((y, x))
except KeyError:
# type 2) constraint doesn't exist in new system
to_del.append(pick_H(i, j, x, y))
continue
# type 1) constraint length changed
if l_old != l_new:
to_del.append(pick_H(i, j, x, y))
# iterate over new_constraints (we were .popping items out)
# (if any left these are type 2))
if new_constraints:
new_to_old = {v: k for k, v in old_to_new_atom_map.items()}
for x, y in new_constraints:
i, j = new_to_old[x], new_to_old[y]
to_del.append(pick_H(i, j, x, y))
# count the number of times each atom appears
to_del_counts = Counter(to_del)
# if a H-atom appears more than once, it means it was involved in
# multiple different constraints at the end states but that the atom is in the core region
# this should not happen
for idx, count in to_del_counts.items():
if count > 1:
# this is raised before we hit the KeyError below
raise ValueError(f"Atom {idx} was involved in {count} unique constraints "
f" that changed between the two end-states. This should not happen for core "
f"atoms, please check your atom mapping. Please raise an issue on the openfe github with "
f"the steps to reproduce this error for more help.")
for idx in to_del:
del no_const_old_to_new_atom_map[idx]
return no_const_old_to_new_atom_map
def get_system_mappings(old_to_new_atom_map,
old_system, old_topology, old_resids,
new_system, new_topology, new_resids,
fix_constraints=True):
"""
From a starting alchemical map between two molecules, get the mappings
between two alchemical end state systems.
Optionally, also fixes the mapping to account for a) element changes, and
b) changes in bond lengths for constraints.
Parameters
----------
old_to_new_atom_map : dict of int : int
Atom mapping between the old and new systems.
old_system : openmm.app.System
System of the "old" alchemical state.
old_topology : openmm.app.Topology
Topology of the "old" alchemical state.
old_resids : npt.NDArray
Residue ids of the alchemical residues in the "old" topology.
new_system : openmm.app.System
System of the "new" alchemical state.
new_topology : openmm.app.Topology
Topology of the "new" alchemical state.
new_resids : npt.NDArray
Residue ids of the alchemical residues in the "new" topology.
fix_constraints : bool, default True
Whether to fix the atom mapping by removing any atoms which are
involved in constrained bonds that change length across the alchemical
change.
Returns
-------
mappings : dictionary
A dictionary with all the necessary mappings for the two systems.
These include:
1. old_to_new_atom_map
This includes all the atoms mapped between the two systems
(including non-core atoms, i.e. environment).
2. new_to_old_atom_map
The inverted dictionary of old_to_new_atom_map
3. old_to_new_core_atom_map
The atom mapping of the "core" atoms (i.e. atoms in alchemical
residues) between the old and new systems
4. new_to_old_core_atom_map
The inverted dictionary of old_to_new_core_atom_map
5. old_to_new_env_atom_map
The atom mapping of solely the "environment" atoms between the
old and new systems.
6. new_to_old_env_atom_map
The inverted dictionaryu of old_to_new_env_atom_map.
7. old_mol_indices
Indices of the alchemical molecule in the old system.
Note: This will not contain the indices of any alchemical waters!
8. new_mol_indices
Indices of the alchemical molecule in the new system.
Note: This will not contain the indices of any alchemical waters!
"""
# Get the indices of the atoms in the alchemical residue of interest for
# both the old and new systems
old_at_indices = _get_indices(old_topology, old_resids)
new_at_indices = _get_indices(new_topology, new_resids)
# We assume that the atom indices are linear in the residue so we shift
# by the index of the first atom in each residue
adjusted_old_to_new_map = {}
for (key, value) in old_to_new_atom_map.items():
shift_old = old_at_indices[0] + key
shift_new = new_at_indices[0] + value
adjusted_old_to_new_map[shift_old] = shift_new
# TODO: the original intent here was to apply over the full mapping of all
# the atoms in the two systems. For now we are only doing the alchemical
# residues. We might want to change this as necessary in the future.
if not fix_constraints:
wmsg = ("Not attempting to fix atom mapping to account for "
"constraints. Please note that core atoms which have "
"constrained bonds and changing bond lengths are not allowed.")
warnings.warn(wmsg)
else:
adjusted_old_to_new_map = _remove_constraints(
adjusted_old_to_new_map, old_system, old_topology,
new_system, new_topology)
# We return a dictionary with all the necessary mappings (as they are
# needed downstream). These include:
# 1. old_to_new_atom_map
# This includes all the atoms mapped between the two systems
# (including non-core atoms, i.e. environment).
# 2. new_to_old_atom_map
# The inverted dictionary of old_to_new_atom_map
# 3. old_to_new_core_atom_map
# The atom mapping of the "core" atoms (i.e. atoms in alchemical
# residues) between the old and new systems
# 4. new_to_old_core_atom_map
# The inverted dictionary of old_to_new_core_atom_map
# 5. old_to_new_env_atom_map
# The atom mapping of solely the "environment" atoms between the old
# and new systems.
# 6. new_to_old_env_atom_map
# The inverted dictionaryu of old_to_new_env_atom_map.
# Because of how we append the topologies, we can assume that the last
# residue in the "new" topology is the ligand, just to be sure we check
# this here - temp fix for now
for at in new_topology.atoms():
if at.index > new_at_indices[-1]:
raise ValueError("residues are appended after the new ligand")
# We assume that all the atoms up until the first ligand atom match
# except from the indices of the ligand in the old topology.
new_to_old_all_map = {}
old_mol_offset = len(old_at_indices)
for i in range(new_at_indices[0]):
if i >= old_at_indices[0]:
old_idx = i + old_mol_offset
else:
old_idx = i
new_to_old_all_map[i] = old_idx
# At this point we only have environment atoms so make a copy
new_to_old_env_map = deepcopy(new_to_old_all_map)
# Next we append the contents of the "core" map we already have
for key, value in adjusted_old_to_new_map.items():
# reverse order because we are going new->old instead of old->new
new_to_old_all_map[value] = key
# Now let's create our output dictionary
mappings = {}
mappings['new_to_old_atom_map'] = new_to_old_all_map
mappings['old_to_new_atom_map'] = {v: k for k, v in new_to_old_all_map.items()}
mappings['new_to_old_core_atom_map'] = {v: k for k, v in adjusted_old_to_new_map.items()}
mappings['old_to_new_core_atom_map'] = adjusted_old_to_new_map
mappings['new_to_old_env_atom_map'] = new_to_old_env_map
mappings['old_to_new_env_atom_map'] = {v: k for k, v in new_to_old_env_map.items()}
mappings['old_mol_indices'] = old_at_indices
mappings['new_mol_indices'] = new_at_indices
return mappings
def set_and_check_new_positions(mapping, old_topology, new_topology,
old_positions, insert_positions,
tolerance=1.0):
"""
Utility to create new positions given a mapping, the old positions and
the positions of the molecule being inserted, defined by `insert_positions.
This will also softly check that the RMS distance between the core atoms
of the old and new atoms do not differ by more than the amount specified
by `tolerance`.
Parameters
----------
mapping : dict of int : int
Dictionary of atom mappings between the old and new systems.
old_topology : openmm.app.Topology
Topology of the "old" alchemical state.
new_topology : openmm.app.Topology
Topology of the "new" alchemical state.
old_positions : simtk.unit.Quantity
Position of the "old" alchemical state.
insert_positions : simtk.unit.Quantity
Positions of the alchemically changing molecule in the "new" alchemical
state.
tolerance : float
Warning threshold for deviations along any dimension (x,y,z) in mapped
atoms between the "old" and "new" positions. Default 1.0.
"""
# Get the positions in Angstrom as raw numpy arrays
old_pos_array = old_positions.value_in_unit(omm_unit.angstrom)
add_pos_array = insert_positions.value_in_unit(omm_unit.angstrom)
# Create empty ndarray of size atoms to hold the positions
new_pos_array = np.zeros((new_topology.getNumAtoms(), 3))
# get your mappings
new_idxs = list(mapping['old_to_new_atom_map'].values())
old_idxs = list(mapping['old_to_new_atom_map'].keys())
new_mol_idxs = mapping['new_mol_indices']
# copy over the old positions for mapped atoms
new_pos_array[new_idxs, :] = old_pos_array[old_idxs, :]
# copy over the new alchemical molecule positions
new_pos_array[new_mol_idxs, :] = add_pos_array
# loop through all mapped atoms and make sure we don't deviate by more than
# tolerance - not super necessary, but it's a nice sanity check
for key, val in mapping['old_to_new_atom_map'].items():
if np.any(
np.abs(new_pos_array[val] - old_pos_array[key]) > tolerance):
wmsg = f"mapping {key} : {val} deviates by more than {tolerance}"
warnings.warn(wmsg)
logging.warning(wmsg)
return new_pos_array * omm_unit.angstrom
================================================
FILE: src/openfe/protocols/openmm_rfe/equil_rfe_methods.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Equilibrium Relative Free Energy Protocol using OpenMM and OpenMMTools in a
Perses-like manner.
This module implements the necessary tooling to calculate the
relative free energy of a ligand transformation using OpenMM tools and one of
the following methods:
- Hamiltonian Replica Exchange
- Self-adjusted mixture sampling
- Independent window sampling
Acknowledgements
----------------
This Protocol is based on, and leverages components originating from
the Perses toolkit (https://github.com/choderalab/perses).
"""
from .equil_rfe_settings import RelativeHybridTopologyProtocolSettings
from .hybridtop_protocol_results import RelativeHybridTopologyProtocolResult
from .hybridtop_protocols import RelativeHybridTopologyProtocol
from .hybridtop_units import (
HybridTopologyMultiStateAnalysisUnit,
HybridTopologyMultiStateSimulationUnit,
HybridTopologySetupUnit,
)
================================================
FILE: src/openfe/protocols/openmm_rfe/equil_rfe_settings.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Equilibrium Relative Free Energy Protocol input settings.
This module implements the necessary settings necessary to run relative free
energies using :class:`openfe.protocols.openmm_rfe.equil_rfe_methods.py`
"""
from __future__ import annotations
from typing import Literal
from gufe.settings import (
OpenMMSystemGeneratorFFSettings,
Settings,
SettingsBaseModel,
ThermoSettings,
)
from gufe.settings.typing import NanometerQuantity
from openff.units import unit
from pydantic import ConfigDict, field_validator
from openfe.protocols.openmm_utils.omm_settings import (
IntegratorSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
)
class LambdaSettings(SettingsBaseModel):
model_config = ConfigDict(extra="ignore", arbitrary_types_allowed=True)
"""Lambda schedule settings.
Settings controlling the lambda schedule, these include the switching
function type, and the number of windows.
"""
lambda_functions: str = "default"
"""
Key of which switching functions to use for alchemical mutation.
Default 'default'.
"""
lambda_windows: int = 11
"""Number of lambda windows to calculate. Default 11."""
class AlchemicalSettings(SettingsBaseModel):
model_config = ConfigDict(extra="ignore", arbitrary_types_allowed=True)
"""Settings for the alchemical protocol
This describes the creation of the hybrid system.
"""
endstate_dispersion_correction: bool = False
"""
Whether to have extra unsampled endstate windows for long range
correction. Default False.
"""
# alchemical settings
use_dispersion_correction: bool = False
"""
Whether to use dispersion correction in the hybrid topology state.
Default False.
"""
softcore_LJ: Literal["gapsys", "beutler"]
"""
Whether to use the LJ softcore function as defined by Gapsys et al.
JCTC 2012, or the one by Beutler et al. Chem. Phys. Lett. 1994.
Default 'gapsys'.
"""
softcore_alpha: float = 0.85
"""Softcore alpha parameter. Default 0.85"""
turn_off_core_unique_exceptions: bool = True
"""
Whether to turn off interactions for new exceptions (not just 1,4s)
at lambda 0 and old exceptions at lambda 1 between unique atoms and core
atoms. If False they are present in the nonbonded force. Default True.
"""
explicit_charge_correction: bool = False
"""
Whether to explicitly account for a charge difference during the
alchemical transformation by transforming a water to a counterion
of the opposite charge of the formal charge difference.
Please note that this feature is currently in beta and poorly tested.
Absolute charge changes greater than 1 are
currently not supported.
Default False.
"""
explicit_charge_correction_cutoff: NanometerQuantity = 0.8 * unit.nanometer
"""
The minimum distance from the system solutes from which an
alchemical water can be chosen. Default 0.8 * unit.nanometer.
"""
class RelativeHybridTopologyProtocolSettings(Settings):
protocol_repeats: int
"""
The number of completely independent repeats of the entire sampling
process. The mean of the repeats defines the final estimate of FE
difference, while the variance between repeats is used as the uncertainty.
"""
@field_validator("protocol_repeats")
def must_be_positive(cls, v):
if v <= 0:
errmsg = f"protocol_repeats must be a positive value, got {v}."
raise ValueError(errmsg)
return v
# Inherited things
forcefield_settings: OpenMMSystemGeneratorFFSettings
"""Parameters to set up the force field with OpenMM Force Fields."""
thermo_settings: ThermoSettings
"""Settings for thermodynamic parameters."""
# Things for creating the systems
solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the system."""
partial_charge_settings: OpenFFPartialChargeSettings
"""Settings for assigning partial charges to small molecules."""
# Alchemical settings
lambda_settings: LambdaSettings
"""
Lambda protocol settings including lambda windows and lambda functions.
"""
alchemical_settings: AlchemicalSettings
"""
Alchemical protocol settings including soft core scaling.
"""
simulation_settings: MultiStateSimulationSettings
"""
Settings for alchemical sampler.
"""
# MD Engine things
engine_settings: OpenMMEngineSettings
"""Settings specific to the OpenMM engine such as the compute platform."""
# Sampling State defining things
integrator_settings: IntegratorSettings
"""Settings for the integrator such as timestep and barostat settings."""
output_settings: MultiStateOutputSettings
"""
Simulation output control settings.
"""
================================================
FILE: src/openfe/protocols/openmm_rfe/hybridtop_protocol_results.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
ProtocolUnitResults for Hybrid Topology methods using
OpenMM and OpenMMTools in a Perses-like manner.
"""
import logging
import pathlib
import warnings
from typing import Optional, Union
import gufe
import numpy as np
import numpy.typing as npt
from openff.units import Quantity
from openmmtools import multistate
logger = logging.getLogger(__name__)
class RelativeHybridTopologyProtocolResult(gufe.ProtocolResult):
"""
Protocol results with the output of a RelativeHybridTopologyProtocol.
"""
def __init__(self, **data):
super().__init__(**data)
# data is mapping of str(repeat_id): list[protocolunitresults]
# TODO: Detect when we have extensions and stitch these together?
if any(len(pur_list) > 2 for pur_list in self.data.values()):
raise NotImplementedError("Can't stitch together results yet")
@staticmethod
def compute_mean_estimate(dGs: list[Quantity]) -> Quantity:
u = dGs[0].u
# convert all values to units of the first value, then take average of magnitude
# this would avoid an edge case where each value was in different units
vals = np.asarray([dG.to(u).m for dG in dGs])
return np.average(vals) * u
def get_estimate(self) -> Quantity:
"""Average free energy difference of this transformation
Returns
-------
dG : openff.units.Quantity
The free energy difference between the first and last states. This is
a Quantity defined with units.
"""
# TODO: Check this holds up completely for SAMS.
dGs = [pus[0].outputs["unit_estimate"] for pus in self.data.values()]
return self.compute_mean_estimate(dGs)
@staticmethod
def compute_uncertainty(dGs: list[Quantity]) -> Quantity:
u = dGs[0].u
# convert all values to units of the first value, then take average of magnitude
# this would avoid a screwy case where each value was in different units
vals = np.asarray([dG.to(u).m for dG in dGs])
return np.std(vals) * u
def get_uncertainty(self) -> Quantity:
"""The uncertainty/error in the dG value: The std of the estimates of
each independent repeat
"""
dGs = [pus[0].outputs["unit_estimate"] for pus in self.data.values()]
return self.compute_uncertainty(dGs)
def get_individual_estimates(self) -> list[tuple[Quantity, Quantity]]:
"""Return a list of tuples containing the individual free energy
estimates and associated MBAR errors for each repeat.
Returns
-------
dGs : list[tuple[openff.units.Quantity]]
n_replicate simulation list of tuples containing the free energy
estimates (first entry) and associated MBAR estimate errors
(second entry).
"""
dGs = [
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
for pus in self.data.values()
]
return dGs
def get_forward_and_reverse_energy_analysis(
self,
) -> list[Optional[dict[str, Union[npt.NDArray, Quantity]]]]:
"""
Get a list of forward and reverse analysis of the free energies
for each repeat using uncorrelated production samples.
The returned dicts have keys:
'fractions' - the fraction of data used for this estimate
'forward_DGs', 'reverse_DGs' - for each fraction of data, the estimate
'forward_dDGs', 'reverse_dDGs' - for each estimate, the uncertainty
The 'fractions' values are a numpy array, while the other arrays are
Quantity arrays, with units attached.
If the list entry is ``None`` instead of a dictionary, this indicates
that the analysis could not be carried out for that repeat. This
is most likely caused by MBAR convergence issues when attempting to
calculate free energies from too few samples.
Returns
-------
forward_reverse : list[Optional[dict[str, Union[npt.NDArray, openff.units.Quantity]]]]
Raises
------
UserWarning
If any of the forward and reverse entries are ``None``.
"""
forward_reverse = [
pus[0].outputs["forward_and_reverse_energies"] for pus in self.data.values()
]
if None in forward_reverse:
wmsg = (
"One or more ``None`` entries were found in the list of "
"forward and reverse analyses. This is likely caused by "
"an MBAR convergence failure caused by too few independent "
"samples when calculating the free energies of the 10% "
"timeseries slice."
)
warnings.warn(wmsg)
return forward_reverse
def get_overlap_matrices(self) -> list[dict[str, npt.NDArray]]:
"""
Return a list of dictionary containing the MBAR overlap estimates
calculated for each repeat.
Returns
-------
overlap_stats : list[dict[str, npt.NDArray]]
A list of dictionaries containing the following keys:
* ``scalar``: One minus the largest nontrivial eigenvalue
* ``eigenvalues``: The sorted (descending) eigenvalues of the
overlap matrix
* ``matrix``: Estimated overlap matrix of observing a sample from
state i in state j
"""
# Loop through and get the repeats and get the matrices
overlap_stats = [pus[0].outputs["unit_mbar_overlap"] for pus in self.data.values()]
return overlap_stats
def get_replica_transition_statistics(self) -> list[dict[str, npt.NDArray]]:
"""The replica lambda state transition statistics for each repeat.
Note
----
This is currently only available in cases where a replica exchange
simulation was run.
Returns
-------
repex_stats : list[dict[str, npt.NDArray]]
A list of dictionaries containing the following:
* ``eigenvalues``: The sorted (descending) eigenvalues of the
lambda state transition matrix
* ``matrix``: The transition matrix estimate of a replica switching
from state i to state j.
"""
try:
repex_stats = [
pus[0].outputs["replica_exchange_statistics"] for pus in self.data.values()
]
except KeyError:
errmsg = "Replica exchange statistics were not found, did you run a repex calculation?"
raise ValueError(errmsg)
return repex_stats
def get_replica_states(self) -> list[npt.NDArray]:
"""
Returns the timeseries of replica states for each repeat.
Returns
-------
replica_states : List[npt.NDArray]
List of replica states for each repeat
"""
def is_file(filename: str):
p = pathlib.Path(filename)
if not p.exists():
errmsg = f"File could not be found {p}"
raise ValueError(errmsg)
return p
replica_states = []
for pus in self.data.values():
nc = is_file(pus[0].outputs["trajectory"])
dir_path = nc.parents[0]
chk = is_file(pus[0].outputs["checkpoint"]).name
reporter = multistate.MultiStateReporter(
storage=nc, checkpoint_storage=chk, open_mode="r"
)
replica_states.append(np.asarray(reporter.read_replica_thermodynamic_states()))
reporter.close()
return replica_states
def equilibration_iterations(self) -> list[float]:
"""
Returns the number of equilibration iterations for each repeat
of the calculation.
Returns
-------
equilibration_lengths : list[float]
"""
equilibration_lengths = [
pus[0].outputs["equilibration_iterations"] for pus in self.data.values()
]
return equilibration_lengths
def production_iterations(self) -> list[float]:
"""
Returns the number of uncorrelated production samples for each
repeat of the calculation.
Returns
-------
production_lengths : list[float]
"""
production_lengths = [pus[0].outputs["production_iterations"] for pus in self.data.values()]
return production_lengths
================================================
FILE: src/openfe/protocols/openmm_rfe/hybridtop_protocols.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Hybrid Topology Protocols using OpenMM and OpenMMTools in a Perses-like manner.
Acknowledgements
----------------
These Protocols are based on, and leverages components originating from
the Perses toolkit (https://github.com/choderalab/perses).
"""
from __future__ import annotations
import logging
import uuid
import warnings
from collections import defaultdict
from typing import Any, Iterable, Optional, Union
import gufe
import numpy as np
from gufe import (
BaseSolventComponent,
ChemicalSystem,
Component,
ComponentMapping,
LigandAtomMapping,
ProteinComponent,
ProteinMembraneComponent,
SmallMoleculeComponent,
SolventComponent,
settings,
)
from openff.units import unit as offunit
from openfe.due import Doi, due
from ..openmm_utils import (
settings_validation,
system_validation,
)
from .equil_rfe_settings import (
AlchemicalSettings,
IntegratorSettings,
LambdaSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
RelativeHybridTopologyProtocolSettings,
)
from .hybridtop_protocol_results import RelativeHybridTopologyProtocolResult
from .hybridtop_units import (
HybridTopologyMultiStateAnalysisUnit,
HybridTopologyMultiStateSimulationUnit,
HybridTopologySetupUnit,
)
logger = logging.getLogger(__name__)
due.cite(
Doi("10.5281/zenodo.1297683"),
description="Perses",
path="openfe.protocols.openmm_rfe.hybridtop_protocols",
cite_module=True,
)
due.cite(
Doi("10.5281/zenodo.596622"),
description="OpenMMTools",
path="openfe.protocols.openmm_rfe.hybridtop_protocols",
cite_module=True,
)
due.cite(
Doi("10.1371/journal.pcbi.1005659"),
description="OpenMM",
path="openfe.protocols.openmm_rfe.hybridtop_protocols",
cite_module=True,
)
class RelativeHybridTopologyProtocol(gufe.Protocol):
"""
Relative Free Energy calculations using a Hybrid Topology scheme
using OpenMM and OpenMMTools.
Based on `Perses `_
See Also
--------
:mod:`openfe.protocols`
:class:`openfe.protocols.openmm_rfe.RelativeHybridTopologySettings`
:class:`openfe.protocols.openmm_rfe.RelativeHybridTopologyResult`
:class:`openfe.protocols.openmm_rfe.RelativeHybridTopologyProtocolUnit`
"""
result_cls = RelativeHybridTopologyProtocolResult
_settings_cls = RelativeHybridTopologyProtocolSettings
_settings: RelativeHybridTopologyProtocolSettings
@classmethod
def _default_settings(cls):
"""A dictionary of initial settings for this creating this Protocol
These settings are intended as a suitable starting point for creating
an instance of this protocol. It is recommended, however that care is
taken to inspect and customize these before performing a Protocol.
Returns
-------
Settings
a set of default settings
"""
return RelativeHybridTopologyProtocolSettings(
protocol_repeats=3,
forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(),
thermo_settings=settings.ThermoSettings(
temperature=298.15 * offunit.kelvin,
pressure=1 * offunit.bar,
),
partial_charge_settings=OpenFFPartialChargeSettings(),
solvation_settings=OpenMMSolvationSettings(),
alchemical_settings=AlchemicalSettings(softcore_LJ="gapsys"),
lambda_settings=LambdaSettings(),
simulation_settings=MultiStateSimulationSettings(
equilibration_length=1.0 * offunit.nanosecond,
production_length=5.0 * offunit.nanosecond,
),
engine_settings=OpenMMEngineSettings(),
integrator_settings=IntegratorSettings(),
output_settings=MultiStateOutputSettings(),
)
@classmethod
def _adaptive_settings(
cls,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.LigandAtomMapping | list[gufe.LigandAtomMapping],
initial_settings: None | RelativeHybridTopologyProtocolSettings = None,
) -> RelativeHybridTopologyProtocolSettings:
"""
Get the recommended OpenFE settings for this protocol based on the input states involved in the
transformation.
These are intended as a suitable starting point for creating an instance of this protocol, which can be further
customized before performing a Protocol.
Parameters
----------
stateA : ChemicalSystem
The initial state of the transformation.
stateB : ChemicalSystem
The final state of the transformation.
mapping : LigandAtomMapping | list[LigandAtomMapping]
The mapping(s) between transforming components in stateA and stateB.
initial_settings : None | RelativeHybridTopologyProtocolSettings, optional
Initial settings to base the adaptive settings on. If None, default settings are used.
Returns
-------
RelativeHybridTopologyProtocolSettings
The recommended settings for this protocol based on the input states.
Notes
-----
- If the transformation involves a change in net charge, the settings are adapted to use a more expensive
protocol with 22 lambda windows and 20 ns production length per window.
- If both states contain a ProteinComponent, the solvation padding is set to 1 nm.
- If initial_settings is provided, the adaptive settings are based on a copy of these settings.
"""
# use initial settings or default settings
# this is needed for the CLI so we don't override user settings
if initial_settings is not None:
protocol_settings = initial_settings.model_copy(deep=True)
else:
protocol_settings = cls.default_settings()
if isinstance(mapping, list):
mapping = mapping[0]
if mapping.get_alchemical_charge_difference() != 0:
# apply the recommended charge change settings taken from the industry benchmarking as fast settings not validated
#
info = (
"Charge changing transformation between ligands "
f"{mapping.componentA.name} and {mapping.componentB.name}. "
"A more expensive protocol with 22 lambda windows, sampled "
"for 20 ns each, will be used here."
)
logger.info(info)
protocol_settings.alchemical_settings.explicit_charge_correction = True
protocol_settings.simulation_settings.production_length = 20 * offunit.nanosecond
protocol_settings.simulation_settings.n_replicas = 22
protocol_settings.lambda_settings.lambda_windows = 22
# adapt the solvation padding based on the system components
if stateA.contains(ProteinComponent):
protocol_settings.solvation_settings.solvent_padding = 1 * offunit.nanometer
# adapt the barostat based on the system components
if stateA.contains(ProteinMembraneComponent):
protocol_settings.integrator_settings.barostat = "MonteCarloMembraneBarostat"
return protocol_settings
@staticmethod
def _validate_endstates(
stateA: ChemicalSystem,
stateB: ChemicalSystem,
) -> None:
"""
Validates the end states for the RFE protocol.
Parameters
----------
stateA : ChemicalSystem
The chemical system of end state A.
stateB : ChemicalSystem
The chemical system of end state B.
Raises
------
ValueError
* If either state contains more than one unique Component.
* If unique components are not SmallMoleculeComponents.
"""
# Get the difference in Components between each state
diff = stateA.component_diff(stateB)
for i, entry in enumerate(diff):
state_label = "A" if i == 0 else "B"
# Check that there is only one unique Component in each state
if len(entry) != 1:
errmsg = (
"Only one alchemical component is allowed per end state. "
f"Found {len(entry)} in state {state_label}."
)
raise ValueError(errmsg)
# Check that the unique Component is a SmallMoleculeComponent
if not isinstance(entry[0], SmallMoleculeComponent):
errmsg = (
f"Alchemical component in state {state_label} is of type "
f"{type(entry[0])}, but only SmallMoleculeComponents "
"transformations are currently supported."
)
raise ValueError(errmsg)
@staticmethod
def _validate_mapping(
mapping: Optional[Union[ComponentMapping, list[ComponentMapping]]],
alchemical_components: dict[str, list[Component]],
) -> None:
"""
Validates that the provided mapping(s) are suitable for the RFE protocol.
Parameters
----------
mapping : Optional[Union[ComponentMapping, list[ComponentMapping]]]
all mappings between transforming components.
alchemical_components : dict[str, list[Component]]
Dictionary containing the alchemical components for
states A and B.
Raises
------
ValueError
* If there are more than one mapping or mapping is None
* If the mapping components are not in the alchemical components.
UserWarning
* Mappings which involve element changes in core atoms
"""
# if a single mapping is provided, convert to list
if isinstance(mapping, ComponentMapping):
mapping = [mapping]
# For now we only support a single mapping
if mapping is None or len(mapping) > 1:
errmsg = "A single LigandAtomMapping is expected for this Protocol"
raise ValueError(errmsg)
# check that the mapping components are in the alchemical components
for m in mapping:
for state in ["A", "B"]:
comp = getattr(m, f"component{state}")
if comp not in alchemical_components[f"state{state}"]:
raise ValueError(
f"Mapping component{state} {comp} not "
f"in alchemical components of state{state}"
)
# TODO: remove - this is now the default behaviour?
# Check for element changes in mappings
for m in mapping:
molA = m.componentA.to_rdkit()
molB = m.componentB.to_rdkit()
for i, j in m.componentA_to_componentB.items():
atomA = molA.GetAtomWithIdx(i)
atomB = molB.GetAtomWithIdx(j)
if atomA.GetAtomicNum() != atomB.GetAtomicNum():
wmsg = (
f"Element change in mapping between atoms "
f"Ligand A: {i} (element {atomA.GetAtomicNum()}) and "
f"Ligand B: {j} (element {atomB.GetAtomicNum()})\n"
"No mass scaling is attempted in the hybrid topology, "
"the average mass of the two atoms will be used in the "
"simulation"
)
logger.warning(wmsg)
warnings.warn(wmsg)
@staticmethod
def _validate_smcs(
stateA: ChemicalSystem,
stateB: ChemicalSystem,
) -> None:
"""
Validates the SmallMoleculeComponents.
Parameters
----------
stateA : ChemicalSystem
The chemical system of end state A.
stateB : ChemicalSystem
The chemical system of end state B.
Raises
------
ValueError
* If there are isomorphic SmallMoleculeComponents with
different charges within a given ChemicalSystem.
"""
smcs_A = stateA.get_components_of_type(SmallMoleculeComponent)
smcs_B = stateB.get_components_of_type(SmallMoleculeComponent)
smcs_all = list(set(smcs_A).union(set(smcs_B)))
def _equal_charges(moli, molj):
# Base case, both molecules don't have charges
if (moli.partial_charges is None) & (molj.partial_charges is None):
return True
# If either is None but not the other
if (moli.partial_charges is None) ^ (molj.partial_charges is None):
return False
# Check if the charges are close to each other
return np.allclose(moli.partial_charges, molj.partial_charges)
clashes = []
for smcs in [smcs_A, smcs_B]:
offmols = [m.to_openff() for m in smcs]
for i, moli in enumerate(offmols):
for molj in offmols:
if moli.is_isomorphic_with(molj):
if not _equal_charges(moli, molj):
clashes.append(smcs[i])
if len(clashes) > 0:
errmsg = (
"Found SmallMoleculeComponents that are isomorphic "
"but with different charges, this is not currently allowed. "
f"Affected components: {clashes}"
)
raise ValueError(errmsg)
@staticmethod
def _validate_charge_difference(
mapping: LigandAtomMapping,
nonbonded_method: str,
explicit_charge_correction: bool,
solvent_component: SolventComponent | None,
):
"""
Validates the net charge difference between the two states.
Parameters
----------
mapping : dict[str, ComponentMapping]
Dictionary of mappings between transforming components.
nonbonded_method : str
The OpenMM nonbonded method used for the simulation.
explicit_charge_correction : bool
Whether or not to use an explicit charge correction.
solvent_component : openfe.SolventComponent | None
The SolventComponent of the simulation.
Raises
------
ValueError
* If an explicit charge correction is attempted and the
nonbonded method is not PME.
* If the absolute charge difference is greater than one
and an explicit charge correction is attempted.
* If an explicit charge correction is attempted and there is no
solvent present.
UserWarning
* If there is any charge difference.
"""
difference = mapping.get_alchemical_charge_difference()
if abs(difference) == 0:
return
if not explicit_charge_correction:
wmsg = (
f"A charge difference of {difference} is observed "
"between the end states. No charge correction has "
"been requested, please account for this in your "
"final results."
)
logger.warning(wmsg)
warnings.warn(wmsg)
return
if solvent_component is None:
errmsg = "Cannot use explicit charge correction without solvent"
raise ValueError(errmsg)
# We implicitly check earlier that we have to have pme for a solvated
# system, so we only need to check the nonbonded method here
if nonbonded_method.lower() != "pme":
errmsg = "Explicit charge correction when not using PME is not currently supported."
raise ValueError(errmsg)
if abs(difference) > 1:
errmsg = (
f"A charge difference of {difference} is observed "
"between the end states and an explicit charge "
"correction has been requested. Unfortunately "
"only absolute differences of 1 are supported."
)
raise ValueError(errmsg)
ion = {-1: solvent_component.positive_ion, 1: solvent_component.negative_ion}[difference]
wmsg = (
f"A charge difference of {difference} is observed "
"between the end states. This will be addressed by "
f"transforming a water into a {ion} ion"
)
logger.info(wmsg)
@staticmethod
def _validate_simulation_settings(
simulation_settings: MultiStateSimulationSettings,
integrator_settings: IntegratorSettings,
output_settings: MultiStateOutputSettings,
):
"""
Validate various simulation settings, including but not limited to
timestep conversions, and output file write frequencies.
Parameters
----------
simulation_settings : MultiStateSimulationSettings
The sampler simulation settings.
integrator_settings : IntegratorSettings
Settings defining the behaviour of the integrator.
output_settings : MultiStateOutputSettings
Settings defining the simulation file writing behaviour.
Raises
------
ValueError
* If the
"""
steps_per_iteration = settings_validation.convert_steps_per_iteration(
simulation_settings=simulation_settings,
integrator_settings=integrator_settings,
)
_ = settings_validation.get_simsteps(
sim_length=simulation_settings.equilibration_length,
timestep=integrator_settings.timestep,
mc_steps=steps_per_iteration,
)
_ = settings_validation.get_simsteps(
sim_length=simulation_settings.production_length,
timestep=integrator_settings.timestep,
mc_steps=steps_per_iteration,
)
_ = settings_validation.convert_checkpoint_interval_to_iterations(
checkpoint_interval=output_settings.checkpoint_interval,
time_per_iteration=simulation_settings.time_per_iteration,
)
if output_settings.positions_write_frequency is not None:
_ = settings_validation.divmod_time_and_check(
numerator=output_settings.positions_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' positions_write_frequency",
denominator_name="sampler settings' time_per_iteration",
)
if output_settings.velocities_write_frequency is not None:
_ = settings_validation.divmod_time_and_check(
numerator=output_settings.velocities_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' velocities_write_frequency",
denominator_name="sampler settings' time_per_iteration",
)
_, _ = settings_validation.convert_real_time_analysis_iterations(
simulation_settings=simulation_settings,
)
def _validate(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.ComponentMapping | list[gufe.ComponentMapping] | None,
extends: gufe.ProtocolDAGResult | None = None,
) -> None:
# Check we're not trying to extend
if extends:
# This technically should be NotImplementedError
# but gufe.Protocol.validate calls `_validate` wrapped around an
# except for NotImplementedError, so we can't raise it here
raise ValueError("Can't extend simulations yet")
# Validate the end states
system_validation.validate_chemical_system(stateA)
system_validation.validate_chemical_system(stateB)
self._validate_endstates(stateA, stateB)
# Validate the mapping
alchem_comps = system_validation.get_alchemical_components(stateA, stateB)
self._validate_mapping(mapping, alchem_comps)
# Validate the small molecule components
self._validate_smcs(stateA, stateB)
# Validate solvent component
nonbond = self.settings.forcefield_settings.nonbonded_method
system_validation.validate_solvent(stateA, nonbond)
# Validate the BaseSolventComponents
base_solvent = stateA.get_components_of_type(BaseSolventComponent)
if len(base_solvent) > 1:
errmsg = "Multiple BaseSolventComponents found, only one is supported."
raise ValueError(errmsg)
# Validate solvation settings
settings_validation.validate_openmm_solvation_settings(self.settings.solvation_settings)
# Validate protein component
system_validation.validate_protein(stateA)
# Validate the barostat used in combination with the protein component
system_validation.validate_barostat(stateA, self.settings.integrator_settings.barostat)
# Validate charge difference
# Note: validation depends on the mapping & solvent component checks
if stateA.contains(SolventComponent):
solv_comp = stateA.get_components_of_type(SolventComponent)[0]
else:
solv_comp = None
self._validate_charge_difference(
mapping=mapping[0] if isinstance(mapping, list) else mapping,
nonbonded_method=self.settings.forcefield_settings.nonbonded_method,
explicit_charge_correction=self.settings.alchemical_settings.explicit_charge_correction,
solvent_component=solv_comp,
)
# Validate integrator things
settings_validation.validate_timestep(
self.settings.forcefield_settings.hydrogen_mass,
self.settings.integrator_settings.timestep,
)
# Validate simulation & output settings
self._validate_simulation_settings(
self.settings.simulation_settings,
self.settings.integrator_settings,
self.settings.output_settings,
)
# Validate alchemical settings
# PR #125 temporarily pin lambda schedule spacing to n_replicas
if (
self.settings.simulation_settings.n_replicas
!= self.settings.lambda_settings.lambda_windows
):
errmsg = (
"Number of replicas in ``simulation_settings``: "
f"{self.settings.simulation_settings.n_replicas} must equal "
"the number of lambda windows in lambda_settings: "
f"{self.settings.lambda_settings.lambda_windows}."
)
raise ValueError(errmsg)
def _create(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: Optional[Union[gufe.ComponentMapping, list[gufe.ComponentMapping]]],
extends: Optional[gufe.ProtocolDAGResult] = None,
) -> list[gufe.ProtocolUnit]:
# validate inputs
self.validate(stateA=stateA, stateB=stateB, mapping=mapping, extends=extends)
# get alchemical components and mapping
alchem_comps = system_validation.get_alchemical_components(stateA, stateB)
ligandmapping = mapping[0] if isinstance(mapping, list) else mapping
# actually create and return Units
Anames = ",".join(c.name for c in alchem_comps["stateA"])
Bnames = ",".join(c.name for c in alchem_comps["stateB"])
# DAG dependency is setup -> simulation -> analysis
# |--------------------->
setup_units = []
simulation_units = []
analysis_units = []
for i in range(self.settings.protocol_repeats):
repeat_id = int(uuid.uuid4())
setup = HybridTopologySetupUnit(
protocol=self,
stateA=stateA,
stateB=stateB,
ligandmapping=ligandmapping,
alchemical_components=alchem_comps,
generation=0,
repeat_id=repeat_id,
name=(f"HybridTopology Setup: {Anames} to {Bnames} repeat {i} generation 0"),
)
simulation = HybridTopologyMultiStateSimulationUnit(
protocol=self,
setup_results=setup,
generation=0,
repeat_id=repeat_id,
name=(f"HybridTopology Simulation: {Anames} to {Bnames} repeat {i} generation 0"),
)
analysis = HybridTopologyMultiStateAnalysisUnit(
protocol=self,
setup_results=setup,
simulation_results=simulation,
generation=0,
repeat_id=repeat_id,
name=(f"HybridTopology Analysis: {Anames} to {Bnames} repeat {i} generation 0"),
)
setup_units.append(setup)
simulation_units.append(simulation)
analysis_units.append(analysis)
return [*setup_units, *simulation_units, *analysis_units]
def _gather(self, protocol_dag_results: Iterable[gufe.ProtocolDAGResult]) -> dict[str, Any]:
# result units will have a repeat_id and generations within this repeat_id
# first group according to repeat_id
unsorted_repeats = defaultdict(list)
for d in protocol_dag_results:
pu: gufe.ProtocolUnitResult
for pu in d.protocol_unit_results:
# We only need the analysis units that are ok
if ("Analysis" not in pu.name) or (not pu.ok()):
continue
unsorted_repeats[pu.outputs["repeat_id"]].append(pu)
# then sort by generation within each repeat_id list
repeats: dict[str, list[gufe.ProtocolUnitResult]] = {}
for k, v in unsorted_repeats.items():
repeats[str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
# returns a dict of repeat_id: sorted list of ProtocolUnitResult
return repeats
================================================
FILE: src/openfe/protocols/openmm_rfe/hybridtop_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
ProtocolUnits for Hybrid Topology methods using OpenMM and OpenMMTools in a
Perses-like manner.
Acknowledgements
----------------
These ProtocolUnits are based on, and leverage components originating from
the Perses toolkit (https://github.com/choderalab/perses).
"""
import logging
import os
import pathlib
import subprocess
from itertools import chain
from typing import Any
import gufe
import matplotlib.pyplot as plt
import mdtraj as mdt
import numpy as np
import numpy.typing as npt
import openmm
import openmmtools
from gufe import (
ChemicalSystem,
Component,
LigandAtomMapping,
ProteinComponent,
SmallMoleculeComponent,
SolvatedPDBComponent,
SolventComponent,
)
from gufe.protocols.errors import ProtocolUnitExecutionError
from gufe.settings import (
SettingsBaseModel,
ThermoSettings,
)
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.units import Quantity
from openff.units import unit as offunit
from openff.units.openmm import ensure_quantity, from_openmm, to_openmm
from openmmforcefields.generators import SystemGenerator
from openmmtools import multistate
import openfe
from openfe.protocols.openmm_utils.omm_settings import (
BasePartialChargeSettings,
)
from ...analysis import plotting
from ...utils import log_system_probe, without_oechem_backend
from ..openmm_utils import (
charge_generation,
multistate_analysis,
omm_compute,
settings_validation,
system_creation,
system_validation,
)
from ..openmm_utils.serialization import (
deserialize,
serialize,
)
from . import _rfe_utils
from ._rfe_utils.relative import HybridTopologyFactory
from .equil_rfe_settings import (
AlchemicalSettings,
IntegratorSettings,
LambdaSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
RelativeHybridTopologyProtocolSettings,
)
logger = logging.getLogger(__name__)
class HybridTopologyUnitMixin:
def _prepare(
self,
verbose: bool,
scratch_basepath: pathlib.Path | None,
shared_basepath: pathlib.Path | None,
):
"""
Set basepaths and do some initial logging.
Parameters
----------
verbose : bool
Verbose output of the simulation progress. Output is provided at the
INFO level logging.
scratch_basepath : pathlib.Path | None
Optional scratch base path to write scratch files to.
shared_basepath : pathlib.Path | None
Optional shared base path to write shared files to.
"""
self.verbose = verbose
# set basepaths
def _set_optional_path(basepath):
if basepath is None:
return pathlib.Path(".")
return basepath
self.scratch_basepath = _set_optional_path(scratch_basepath)
self.shared_basepath = _set_optional_path(shared_basepath)
@staticmethod
def _get_settings(
settings: RelativeHybridTopologyProtocolSettings,
) -> dict[str, SettingsBaseModel]:
"""
Get a dictionary of Protocol settings.
Returns
-------
protocol_settings : dict[str, SettingsBaseModel]
Notes
-----
We return a dict so that we can duck type behaviour between phases.
For example subclasses may contain both `solvent` and `complex`
settings, using this approach we can extract the relevant entry
to the same key and pass it to other methods in a seamless manner.
"""
protocol_settings: dict[str, SettingsBaseModel] = {}
protocol_settings["forcefield_settings"] = settings.forcefield_settings
protocol_settings["thermo_settings"] = settings.thermo_settings
protocol_settings["alchemical_settings"] = settings.alchemical_settings
protocol_settings["lambda_settings"] = settings.lambda_settings
protocol_settings["charge_settings"] = settings.partial_charge_settings
protocol_settings["solvation_settings"] = settings.solvation_settings
protocol_settings["simulation_settings"] = settings.simulation_settings
protocol_settings["output_settings"] = settings.output_settings
protocol_settings["integrator_settings"] = settings.integrator_settings
protocol_settings["engine_settings"] = settings.engine_settings
return protocol_settings
@staticmethod
def _verify_execution_environment(
setup_outputs: dict[str, Any],
) -> None:
"""
Check that the Python environment hasn't changed based on the
relevant Python library versions stored in the setup outputs.
"""
try:
if (
(gufe.__version__ != setup_outputs["gufe_version"])
or (openfe.__version__ != setup_outputs["openfe_version"])
or (openmm.__version__ != setup_outputs["openmm_version"])
):
errmsg = "Python environment has changed, cannot continue Protocol execution."
raise ProtocolUnitExecutionError(errmsg)
except KeyError:
errmsg = "Missing environment information from setup outputs."
raise ProtocolUnitExecutionError(errmsg)
class HybridTopologySetupUnit(gufe.ProtocolUnit, HybridTopologyUnitMixin):
"""
Setup unit for Hybrid Topology Protocol transformations.
"""
@staticmethod
def _get_components(
stateA: ChemicalSystem, stateB: ChemicalSystem
) -> tuple[SolventComponent, ProteinComponent, dict[SmallMoleculeComponent, OFFMolecule]]:
"""
Get the components from the ChemicalSystem inputs.
Parameters
----------
stateA : ChemicalSystem
ChemicalSystem defining the state A components.
stateB : CHemicalSystem
ChemicalSystem defining the state B components.
Returns
-------
solv_comp : SolventComponent
The solvent component.
protein_comp : ProteinComponent
The protein component.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of small molecule components paired
with their OpenFF Molecule.
"""
solvent_comp, protein_comp, smcs_A = system_validation.get_components(stateA)
_, _, smcs_B = system_validation.get_components(stateB)
small_mols = {m: m.to_openff() for m in set(smcs_A).union(set(smcs_B))}
# If there is a SolvatedPDBComponent, we set the solvent_comp
if isinstance(protein_comp, SolvatedPDBComponent):
solvent_comp = protein_comp
return solvent_comp, protein_comp, small_mols
@staticmethod
def _assign_partial_charges(
charge_settings: OpenFFPartialChargeSettings,
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
) -> None:
"""
Assign partial charges to the OpenFF Molecules associated with all
the SmallMoleculeComponents in the transformation.
Parameters
----------
charge_settings : OpenFFPartialChargeSettings
Settings for controlling how the partial charges are assigned.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
their associated SmallMoleculeComponent.
"""
for smc, mol in small_mols.items():
charge_generation.assign_offmol_partial_charges(
offmol=mol,
overwrite=False,
method=charge_settings.partial_charge_method,
toolkit_backend=charge_settings.off_toolkit_backend,
generate_n_conformers=charge_settings.number_of_conformers,
nagl_model=charge_settings.nagl_model,
)
@staticmethod
def _get_system_generator(
settings: dict[str, SettingsBaseModel],
solvent_component: SolventComponent | None,
openff_molecules: list[OFFMolecule] | None,
ffcache: pathlib.Path | None,
) -> SystemGenerator:
"""
Get an OpenMM SystemGenerator.
Parameters
----------
settings : dict[str, SettingsBaseModel]
A dictionary of protocol settings.
solvent_component : SolventComponent | None
The solvent component of the system, if any.
openff_molecules : list[openff.toolkit.Molecule] | None
A list of openff molecules to generate templates for, if any.
ffcache : pathlib.Path | None
Path to the force field parameter cache.
Returns
-------
system_generator : openmmtools.SystemGenerator
The SystemGenerator for the protocol.
"""
system_generator = system_creation.get_system_generator(
forcefield_settings=settings["forcefield_settings"],
integrator_settings=settings["integrator_settings"],
thermo_settings=settings["thermo_settings"],
cache=ffcache,
has_solvent=solvent_component is not None,
)
# Handle openff Molecule templates
# TODO: revisit this once the SystemGenerator update happens
# and we start loading the whole protein into OpenFF Topologies
if openff_molecules is None:
return system_generator
# Register all the templates, pass unique molecules to avoid clashes
system_generator.add_molecules(list(set(openff_molecules)))
return system_generator
@staticmethod
def _create_stateA_system(
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
protein_component: ProteinComponent | None,
solvent_component: SolventComponent | None,
system_generator: SystemGenerator,
solvation_settings: OpenMMSolvationSettings,
) -> tuple[
openmm.System, openmm.app.Topology, openmm.unit.Quantity, dict[Component, npt.NDArray]
]:
"""
Create an OpenMM System for state A.
Parameters
----------
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
A list of small molecules to include in the System.
protein_component : ProteinComponent | None
Optionally, the protein component to include in the System.
solvent_component : SolventComponent | None
Optionally, the solvent component to include in the System.
system_generator : SystemGenerator
The SystemGenerator object ot use to construct the System.
solvation_settings : OpenMMSolvationSettings
Settings defining how to build the System.
Returns
-------
system : openmm.System
The System that defines state A.
topology : openmm.app.Topology
The Topology defining the returned System.
positions : openmm.unit.Quantity
The positions of the particles in the System.
comp_residues : dict[Component, npt.NDArray]
A dictionary defining which residues in the System
belong to which ChemicalSystem Component.
"""
modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=protein_component,
solvent_comp=solvent_component,
small_mols=small_mols,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
topology = modeller.getTopology()
# Note: roundtrip positions to remove vec3 issues
positions = to_openmm(from_openmm(modeller.getPositions()))
system = system_generator.create_system(
modeller.topology,
molecules=list(small_mols.values()),
)
return system, topology, positions, comp_resids
@staticmethod
def _create_stateB_system(
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
mapping: LigandAtomMapping,
stateA_topology: openmm.app.Topology,
exclude_resids: npt.NDArray,
system_generator: SystemGenerator,
) -> tuple[openmm.System, openmm.app.Topology, npt.NDArray]:
"""
Create the state B System from the state A Topology.
Parameters
----------
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules keyed by SmallMoleculeComponent
to be present in system B.
mapping : LigandAtomMapping
LigandAtomMapping defining the correspondence between state A
and B's alchemical ligand.
stateA_topology : openmm.app.Topology
The OpenMM topology for state A.
exclude_resids : npt.NDArray
A list of residues to exclude from state A when building state B.
system_generator : SystemGenerator
The SystemGenerator to use to build System B.
Returns
-------
system : openmm.System
The state B System.
topology : openmm.app.Topology
The OpenMM Topology associated with the state B System.
alchem_resids : npt.NDArray
The residue indices of the state B alchemical species.
"""
topology, alchem_resids = _rfe_utils.topologyhelpers.combined_topology(
topology1=stateA_topology,
topology2=small_mols[mapping.componentB].to_topology().to_openmm(),
exclude_resids=exclude_resids,
)
system = system_generator.create_system(
topology,
molecules=list(small_mols.values()),
)
return system, topology, alchem_resids
@staticmethod
def _handle_net_charge(
stateA_topology: openmm.app.Topology,
stateA_positions: openmm.unit.Quantity,
stateB_topology: openmm.app.Topology,
stateB_system: openmm.System,
charge_difference: int,
system_mappings: dict[str, dict[int, int]],
distance_cutoff: Quantity,
solvent_component: SolventComponent | None,
) -> None:
"""
Handle system net charge by adding an alchemical water.
Parameters
----------
stateA_topology : openmm.app.Topology
stateA_positions : openmm.unit.Quantity
stateB_topology : openmm.app.Topology
stateB_system : openmm.System
charge_difference : int
system_mappings : dict[str, dict[int, int]]
distance_cutoff : Quantity
solvent_component : SolventComponent | None
"""
# Base case, return if no net charge
if charge_difference == 0:
return
# Get the residue ids for waters to turn alchemical
alchem_water_resids = _rfe_utils.topologyhelpers.get_alchemical_waters(
topology=stateA_topology,
positions=stateA_positions,
charge_difference=charge_difference,
distance_cutoff=distance_cutoff,
)
# In-place modify state B alchemical waters to ions
_rfe_utils.topologyhelpers.handle_alchemical_waters(
water_resids=alchem_water_resids,
topology=stateB_topology,
system=stateB_system,
system_mapping=system_mappings,
charge_difference=charge_difference,
solvent_component=solvent_component,
)
def _get_omm_objects(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: LigandAtomMapping,
settings: dict[str, SettingsBaseModel],
protein_component: ProteinComponent | None,
solvent_component: SolventComponent | None,
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
) -> tuple[
openmm.System,
openmm.app.Topology,
openmm.unit.Quantity,
openmm.System,
openmm.app.Topology,
openmm.unit.Quantity,
dict[str, dict[int, int]],
]:
"""
Get OpenMM objects for both end states A and B.
Parameters
----------
stateA : ChemicalSystem
ChemicalSystem defining end state A.
stateB : ChemicalSystem
ChemicalSystem defining end state B.
mapping : LigandAtomMapping
The mapping for alchemical components between state A and B.
settings : dict[str, SettingsBaseModel]
Settings for the transformation.
protein_component : ProteinComponent | None
The common ProteinComponent between the end states, if there is is one.
solvent_component : SolventComponent | None
The common SolventComponent between the end states, if there is one.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
The small molecules for both end states.
Returns
-------
stateA_system : openmm.System
OpenMM System for state A.
stateA_topology : openmm.app.Topology
OpenMM Topology for the state A System.
stateA_positions : openmm.unit.Quantity
Positions of partials for state A System.
stateB_system : openmm.System
OpenMM System for state B.
stateB_topology : openmm.app.Topology
OpenMM Topology for the state B System.
stateB_positions : openmm.unit.Quantity
Positions of partials for state B System.
system_mapping : dict[str, dict[int, int]]
Dictionary of mappings defining the correspondence between
the two state Systems.
"""
if self.verbose:
self.logger.info("Parameterizing systems")
def _filter_small_mols(smols, state):
return {smc: offmol for smc, offmol in smols.items() if state.contains(smc)}
states_inputs = {
"A": {"state": stateA, "mols": _filter_small_mols(small_mols, stateA)},
"B": {"state": stateB, "mols": _filter_small_mols(small_mols, stateB)},
}
# Everything involving systemgenerator handling has a risk of
# oechem <-> rdkit smiles conversion clashes, cautiously ban it.
with without_oechem_backend():
# Get the system generators with all the templates registered
for state in ["A", "B"]:
ffcache = settings["output_settings"].forcefield_cache
if ffcache is not None:
ffcache = self.shared_basepath / (f"{state}_" + ffcache)
states_inputs[state]["generator"] = self._get_system_generator(
settings=settings,
solvent_component=solvent_component,
openff_molecules=list(states_inputs[state]["mols"].values()),
ffcache=ffcache,
)
(stateA_system, stateA_topology, stateA_positions, comp_resids) = (
self._create_stateA_system(
small_mols=states_inputs["A"]["mols"],
protein_component=protein_component,
solvent_component=solvent_component,
system_generator=states_inputs["A"]["generator"],
solvation_settings=settings["solvation_settings"],
)
)
(stateB_system, stateB_topology, stateB_alchem_resids) = self._create_stateB_system(
small_mols=states_inputs["B"]["mols"],
mapping=mapping,
stateA_topology=stateA_topology,
exclude_resids=comp_resids[mapping.componentA],
system_generator=states_inputs["B"]["generator"],
)
# Get the mapping between the two systems
system_mappings = _rfe_utils.topologyhelpers.get_system_mappings(
old_to_new_atom_map=mapping.componentA_to_componentB,
old_system=stateA_system,
old_topology=stateA_topology,
old_resids=comp_resids[mapping.componentA],
new_system=stateB_system,
new_topology=stateB_topology,
new_resids=stateB_alchem_resids,
# These are non-optional settings for this method
fix_constraints=True,
)
# Net charge: add alchemical water if needed
# Must be done here as we in-place modify the particles of state B.
if settings["alchemical_settings"].explicit_charge_correction:
self._handle_net_charge(
stateA_topology=stateA_topology,
stateA_positions=stateA_positions,
stateB_topology=stateB_topology,
stateB_system=stateB_system,
charge_difference=mapping.get_alchemical_charge_difference(),
system_mappings=system_mappings,
distance_cutoff=settings["alchemical_settings"].explicit_charge_correction_cutoff,
solvent_component=solvent_component,
)
# Finally get the state B positions
stateB_positions = _rfe_utils.topologyhelpers.set_and_check_new_positions(
system_mappings,
stateA_topology,
stateB_topology,
old_positions=ensure_quantity(stateA_positions, "openmm"),
insert_positions=ensure_quantity(
small_mols[mapping.componentB].conformers[0], "openmm"
),
)
return (
stateA_system,
stateA_topology,
stateA_positions,
stateB_system,
stateB_topology,
stateB_positions,
system_mappings,
)
@staticmethod
def _get_alchemical_system(
stateA_system: openmm.System,
stateA_positions: openmm.unit.Quantity,
stateA_topology: openmm.app.Topology,
stateB_system: openmm.System,
stateB_positions: openmm.unit.Quantity,
stateB_topology: openmm.app.Topology,
system_mappings: dict[str, dict[int, int]],
alchemical_settings: AlchemicalSettings,
):
"""
Get the hybrid topology alchemical system.
Parameters
----------
stateA_system : openmm.System
State A OpenMM System
stateA_positions : openmm.unit.Quantity
Positions of state A System
stateA_topology : openmm.app.Topology
Topology of state A System
stateB_system : openmm.System
State B OpenMM System
stateB_positions : openmm.unit.Quantity
Positions of state B System
stateB_topology : openmm.app.Topology
Topology of state B System
system_mappings : dict[str, dict[int, int]]
Mapping of corresponding atoms between the two Systems.
alchemical_settings : AlchemicalSettings
The alchemical settings defining how the alchemical system
will be built.
Returns
-------
hybrid_factory : HybridTopologyFactory
The factory creating the hybrid system.
hybrid_system : openmm.System
The hybrid System.
"""
if alchemical_settings.softcore_LJ.lower() == "gapsys":
softcore_LJ_v2 = True
elif alchemical_settings.softcore_LJ.lower() == "beutler":
softcore_LJ_v2 = False
hybrid_factory = _rfe_utils.relative.HybridTopologyFactory(
stateA_system,
stateA_positions,
stateA_topology,
stateB_system,
stateB_positions,
stateB_topology,
old_to_new_atom_map=system_mappings["old_to_new_atom_map"],
old_to_new_core_atom_map=system_mappings["old_to_new_core_atom_map"],
use_dispersion_correction=alchemical_settings.use_dispersion_correction,
softcore_alpha=alchemical_settings.softcore_alpha,
softcore_LJ_v2=softcore_LJ_v2,
softcore_LJ_v2_alpha=alchemical_settings.softcore_alpha,
interpolate_old_and_new_14s=alchemical_settings.turn_off_core_unique_exceptions,
)
return hybrid_factory, hybrid_factory.hybrid_system
def _subsample_topology(
self,
hybrid_topology: mdt.Topology,
hybrid_positions: openmm.unit.Quantity,
output_selection: str,
output_filename: str,
atom_classes: dict[str, set[int]],
) -> npt.NDArray:
"""
Subsample the hybrid topology based on user-selected output selection
and write the subsampled topology to a PDB file.
Parameters
----------
hybrid_topology : mdtraj.Topology
The hybrid system topology to subsample.
hybrid_positions : openmm.unit.Quantity
The hybrid system positions.
output_selection : str
An MDTraj selection string to subsample the topology with.
output_filename : str
The name of the file to write the PDB to.
atom_classes : dict[str, set[int]]
A dictionary defining what atoms belong to the different
components of the hybrid system.
Returns
-------
selection_indices : npt.NDArray
The indices of the subselected system.
TODO
----
* Modify this to also store the full system.
* Use the mdtraj_from_openmm utility.
"""
selection_indices = hybrid_topology.select(output_selection)
# Write out a PDB containing the subsampled hybrid state
# We use bfactors as a hack to label different states
# bfactor of 0 is environment atoms
# bfactor of 0.25 is unique old atoms
# bfactor of 0.5 is core atoms
# bfactor of 0.75 is unique new atoms
bfactors = np.zeros_like(selection_indices, dtype=float)
bfactors[np.isin(selection_indices, list(atom_classes["unique_old_atoms"]))] = 0.25
bfactors[np.isin(selection_indices, list(atom_classes["core_atoms"]))] = 0.50
bfactors[np.isin(selection_indices, list(atom_classes["unique_new_atoms"]))] = 0.75
if len(selection_indices) > 0:
traj = mdt.Trajectory(
hybrid_positions[selection_indices, :],
hybrid_topology.subset(selection_indices),
).save_pdb(
self.shared_basepath / output_filename,
bfactors=bfactors,
)
return selection_indices
def run(
self,
*,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Setup a hybrid topology system.
Parameters
----------
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created by the setup unit or the debug objects
(e.g. HybridTopologyFactory) if ``dry==True``.
Raises
------
error
Exception if anything failed
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting system setup unit")
# Get settings
settings = self._get_settings(self._inputs["protocol"].settings)
# Get components
stateA = self._inputs["stateA"]
stateB = self._inputs["stateB"]
mapping = self._inputs["ligandmapping"]
alchem_comps = self._inputs["alchemical_components"]
solvent_comp, protein_comp, small_mols = self._get_components(stateA, stateB)
# Assign partial charges now to avoid any discrepancies later
self._assign_partial_charges(settings["charge_settings"], small_mols)
(
stateA_system,
stateA_topology,
stateA_positions,
stateB_system,
stateB_topology,
stateB_positions,
system_mappings,
) = self._get_omm_objects(
stateA=stateA,
stateB=stateB,
mapping=mapping,
settings=settings,
protein_component=protein_comp,
solvent_component=solvent_comp,
small_mols=small_mols,
)
# Get the hybrid factory & system
hybrid_factory, hybrid_system = self._get_alchemical_system(
stateA_system=stateA_system,
stateA_positions=stateA_positions,
stateA_topology=stateA_topology,
stateB_system=stateB_system,
stateB_positions=stateB_positions,
stateB_topology=stateB_topology,
system_mappings=system_mappings,
alchemical_settings=settings["alchemical_settings"],
)
# Subselect system based on user inputs & write initial PDB
selection_indices = self._subsample_topology(
hybrid_topology=hybrid_factory.hybrid_topology,
hybrid_positions=hybrid_factory.hybrid_positions,
output_selection=settings["output_settings"].output_indices,
output_filename=settings["output_settings"].output_structure,
atom_classes=hybrid_factory._atom_classes,
)
# Serialize things
# OpenMM System
system_outfile = self.shared_basepath / "hybrid_system.xml.bz2"
serialize(hybrid_system, system_outfile)
# Positions
positions_outfile = self.shared_basepath / "hybrid_positions.npy"
npy_positions = from_openmm(hybrid_factory.hybrid_positions).to("nanometer").m
np.save(positions_outfile, npy_positions)
unit_results_dict = {
"system": system_outfile,
"positions": positions_outfile,
"pdb_structure": self.shared_basepath / settings["output_settings"].output_structure,
"selection_indices": selection_indices,
}
if dry:
unit_results_dict |= {
# Adding unserialized objects so we can directly use them
# to chain units in tests
"hybrid_factory": hybrid_factory,
"hybrid_system": hybrid_system,
"hybrid_positions": hybrid_factory.hybrid_positions,
}
return unit_results_dict
def _execute(
self,
ctx: gufe.Context,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
outputs = self.run(scratch_basepath=ctx.scratch, shared_basepath=ctx.shared)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"openmm_version": openmm.__version__,
"openfe_version": openfe.__version__,
"gufe_version": gufe.__version__,
**outputs,
}
class HybridTopologyMultiStateSimulationUnit(gufe.ProtocolUnit, HybridTopologyUnitMixin):
"""
Multi-state simulation (e.g. multi replica methods like hamiltonian
replica exchange) unit for Hybrid Topology Protocol transformations.
"""
@staticmethod
def _check_restart(output_settings: SettingsBaseModel, shared_path: pathlib.Path):
"""
Check if we are doing a restart.
Parameters
----------
output_settings : SettingsBaseModel
The simulation output settings
shared_path : pathlib.Path
The shared directory where we should be looking for existing files.
Notes
-----
For now this just checks if the netcdf files are present in the
shared directory but in the future this may expand depending on
how warehouse works.
Raises
------
IOError
If either the checkpoint or trajectory files don't exist.
"""
trajectory = shared_path / output_settings.output_filename
checkpoint = shared_path / output_settings.checkpoint_storage_filename
if trajectory.is_file() and checkpoint.is_file():
return True
elif trajectory.is_file() ^ checkpoint.is_file():
if trajectory.is_file():
errmsg = "the trajectory file is present but not the checkpoint file. "
else:
errmsg = "the checkpoint file is present but not the trajectory file. "
errmsg = (
"Attempting to restart but "
+ errmsg
+ "This should not happen under normal circumstances."
)
raise IOError(errmsg)
else:
return False
@staticmethod
def _get_integrator(
integrator_settings: IntegratorSettings,
simulation_settings: MultiStateSimulationSettings,
system: openmm.System,
) -> openmmtools.mcmc.LangevinDynamicsMove:
"""
Get and validate the integrator
Parameters
----------
integrator_settings : IntegratorSettings
Settings controlling the Langevin integrator.
simulation_settings : MultiStateSimulationSettings
Settings controlling the simulation.
system : openmm.System
The OpenMM System.
Returns
-------
integrator : openmmtools.mcmc.LangevinDynamicsMove
The LangevinDynamicsMove integrator.
Raises
------
ValueError
If there are virtual sites in the system, but velocities
are not being reassigned after every MCMC move.
"""
steps_per_iteration = settings_validation.convert_steps_per_iteration(
simulation_settings, integrator_settings
)
integrator = openmmtools.mcmc.LangevinDynamicsMove(
timestep=to_openmm(integrator_settings.timestep),
collision_rate=to_openmm(integrator_settings.langevin_collision_rate),
n_steps=steps_per_iteration,
reassign_velocities=integrator_settings.reassign_velocities,
n_restart_attempts=integrator_settings.n_restart_attempts,
constraint_tolerance=integrator_settings.constraint_tolerance,
)
# Validate for known issue when dealing with virtual sites
# and multistate simulations
if not integrator_settings.reassign_velocities:
for particle_idx in range(system.getNumParticles()):
if system.isVirtualSite(particle_idx):
errmsg = (
"Simulations with virtual sites without velocity "
"reassignments are unstable with MCMC integrators."
)
raise ValueError(errmsg)
return integrator
@staticmethod
def _get_reporter(
storage_path: pathlib.Path,
selection_indices: npt.NDArray,
output_settings: MultiStateOutputSettings,
simulation_settings: MultiStateSimulationSettings,
) -> multistate.MultiStateReporter:
"""
Get the multistate reporter.
Parameters
----------
storage_path : pathlib.Path
Path to the directory where files should be written.
selection_indices : npt.NDArray
The set of system indices to report positions & velocities for.
output_settings : MultiStateOutputSettings
Settings defining how outputs should be written.
simulation_settings : MultiStateSimulationSettings
Settings defining out the simulation should be run.
Notes
-----
All this does is create the reporter, it works for both
new reporters and if we are doing a restart.
"""
# Define the trajectory & checkpoint files
nc = storage_path / output_settings.output_filename
# The checkpoint file in openmmtools is taken as a file relative
# to the location of the nc file, so you only want the filename
chk = output_settings.checkpoint_storage_filename
if output_settings.positions_write_frequency is not None:
pos_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.positions_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' position_write_frequency",
denominator_name="simulation settings' time_per_iteration",
)
else:
pos_interval = 0
if output_settings.velocities_write_frequency is not None:
vel_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.velocities_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' velocity_write_frequency",
denominator_name="sampler settings' time_per_iteration",
)
else:
vel_interval = 0
chk_intervals = settings_validation.convert_checkpoint_interval_to_iterations(
checkpoint_interval=output_settings.checkpoint_interval,
time_per_iteration=simulation_settings.time_per_iteration,
)
return multistate.MultiStateReporter(
storage=nc,
analysis_particle_indices=selection_indices,
checkpoint_interval=chk_intervals,
checkpoint_storage=chk,
position_interval=pos_interval,
velocity_interval=vel_interval,
)
@staticmethod
def _get_sampler(
system: openmm.System,
positions: openmm.unit.Quantity,
lambdas: _rfe_utils.lambdaprotocol.LambdaProtocol,
integrator: openmmtools.mcmc.MCMCMove,
reporter: multistate.MultiStateReporter,
simulation_settings: MultiStateSimulationSettings,
thermo_settings: ThermoSettings,
alchem_settings: AlchemicalSettings,
platform: openmm.Platform,
restart: bool,
dry: bool,
) -> multistate.MultiStateSampler:
"""
Get the MultiStateSampler.
Parameters
----------
system : openmm.System
The OpenMM System to simulate.
positions : openmm.unit.Quantity
The positions of the OpenMM System.
lambdas : LambdaProtocol
The lambda protocol to sample along.
integrator : openmmtools.mcmc.MCMCMove
The integrator to use.
reporter : multistate.MultiStateReporter
The reporter to attach to the sampler.
simulation_settings : MultiStateSimulationSettings
The simulation control settings.
thermo_settings : ThermoSettings
The thermodynamic control settings.
alchem_settings : AlchemicalSettings
The alchemical transformation settings.
platform : openmm.Platform
The compute platform to use.
restart : bool
``True`` if we are doing a simulation restart.
dry : bool
Whether or not this is a dry run.
Returns
-------
sampler : multistate.MultiStateSampler
The requested sampler.
"""
_SAMPLERS = {
"repex": _rfe_utils.multistate.HybridRepexSampler,
"sams": _rfe_utils.multistate.HybridSAMSSampler,
"independent": _rfe_utils.multistate.HybridMultiStateSampler,
}
# note we if/else around sampler method because in the future
# we will try to reuse this method and just have _SAMPLERs be
# defined elsewhere
sampler_method = simulation_settings.sampler_method.lower()
try:
sampler_class = _SAMPLERS[sampler_method]
except KeyError:
errmsg = f"Unknown sampler {sampler_method}"
raise AttributeError(errmsg)
# Get the real time analysis values to use
rta_its, rta_min_its = settings_validation.convert_real_time_analysis_iterations(
simulation_settings=simulation_settings,
)
# Get the number of production iterations to run for
steps_per_iteration = integrator.n_steps
timestep = from_openmm(integrator.timestep)
number_of_iterations = int(
settings_validation.get_simsteps(
sim_length=simulation_settings.production_length,
timestep=timestep,
mc_steps=steps_per_iteration,
)
/ steps_per_iteration
)
# convert early_termination_target_error from kcal/mol to kT
early_termination_target_error = (
settings_validation.convert_target_error_from_kcal_per_mole_to_kT(
thermo_settings.temperature,
simulation_settings.early_termination_target_error,
)
)
sampler_kwargs = {
"mcmc_moves": integrator,
"hybrid_system": system,
"hybrid_positions": positions,
"online_analysis_interval": rta_its,
"online_analysis_target_error": early_termination_target_error,
"online_analysis_minimum_iterations": rta_min_its,
"number_of_iterations": number_of_iterations,
}
if sampler_method == "sams":
sampler_kwargs |= {
"flatness_criteria": simulation_settings.sams_flatness_criteria,
"gamma0": simulation_settings.sams_gamma0,
}
if sampler_method == "repex":
sampler_kwargs |= {"replica_mixing_scheme": "swap-all"}
# Restarting doesn't need any setup, we just rebuild from storage.
if restart:
sampler = sampler_class.from_storage(reporter) # type: ignore[attr-defined]
# We do some checks to make sure we are running the same system
system_validation.assert_multistate_system_equality(
ref_system=system,
stored_system=sampler._thermodynamic_states[0].get_system(remove_thermostat=True),
)
# We do check to make sure we have the same thermodynamic
# parameters and that the lambda schedule is the same.
for index, thermostate in enumerate(sampler._thermodynamic_states):
assert thermostate.temperature == to_openmm(thermo_settings.temperature)
assert thermostate.pressure == to_openmm(thermo_settings.pressure)
for key in lambdas.functions:
lambda_value = lambdas.lambda_schedule[index]
expected = lambdas.functions[key](lambda_value)
stored = getattr(thermostate, key)
assert expected == stored
# Finally we check that some of the sampler parameters haven't changed
if (
(simulation_settings.n_replicas != sampler.n_states)
or (simulation_settings.n_replicas != sampler.n_replicas)
or (sampler.mcmc_moves[0].n_steps != steps_per_iteration)
or (sampler.mcmc_moves[0].timestep != integrator.timestep)
):
errmsg = "Sampler in checkpoint does not match Protocol settings, cannot resume."
raise ValueError(errmsg)
else:
sampler = sampler_class(**sampler_kwargs)
sampler.setup(
n_replicas=simulation_settings.n_replicas,
reporter=reporter,
lambda_protocol=lambdas,
temperature=to_openmm(thermo_settings.temperature),
endstates=alchem_settings.endstate_dispersion_correction,
minimization_platform=platform.getName(),
# Set minimization steps to None when running in dry mode
# otherwise do a very small one to avoid NaNs
minimization_steps=100 if not dry else None,
)
# Get and set the context caches
sampler.energy_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
sampler.sampler_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
return sampler
def _run_simulation(
self,
sampler: multistate.MultiStateSampler,
reporter: multistate.MultiStateReporter,
simulation_settings: MultiStateSimulationSettings,
integrator_settings: IntegratorSettings,
output_settings: MultiStateOutputSettings,
dry: bool,
):
"""
Run the simulation.
Parameters
----------
sampler : multistate.MultiStateSampler.
The sampler associated with the simulation to run.
reporter : multistate.MultiStateReporter
The reporter associated with the sampler.
simulation_settings : MultiStateSimulationSettings
Simulation control settings.
integrator_settings : IntegratorSettings
Integrator control settings.
output_settings : MultiStateOutputSettings
Simulation output control settings.
dry : bool
Whether or not to dry run the simulation.
"""
# Get the relevant simulation steps
mc_steps = settings_validation.convert_steps_per_iteration(
simulation_settings=simulation_settings,
integrator_settings=integrator_settings,
)
equil_steps = settings_validation.get_simsteps(
sim_length=simulation_settings.equilibration_length,
timestep=integrator_settings.timestep,
mc_steps=mc_steps,
)
prod_steps = settings_validation.get_simsteps(
sim_length=simulation_settings.production_length,
timestep=integrator_settings.timestep,
mc_steps=mc_steps,
)
if not dry: # pragma: no-cover
# No productions steps have been taken, so start from scratch
if sampler._iteration == 0:
# minimize
if self.verbose:
self.logger.info("minimizing systems")
sampler.minimize(max_iterations=simulation_settings.minimization_steps)
# equilibrate
if self.verbose:
self.logger.info("equilibrating systems")
sampler.equilibrate(int(equil_steps / mc_steps))
# At this point we are ready for production
if self.verbose:
self.logger.info("running production phase")
# We use `run` so that we're limited by the number of iterations
# we passed when we built the sampler.
# TODO: I'm being extra prudent by passing in n_iterations here - remove?
sampler.run(n_iterations=int(prod_steps / mc_steps) - sampler._iteration)
if self.verbose:
self.logger.info("production phase complete")
else:
# We ran a dry simulation
# close reporter when you're done, prevent file handle clashes
reporter.close()
# TODO: review this is likely no longer necessary
# clean up the reporter file
fns = [
self.shared_basepath / output_settings.output_filename,
self.shared_basepath / output_settings.checkpoint_storage_filename,
]
for fn in fns:
os.remove(fn)
def run(
self,
*,
system: openmm.System,
positions: openmm.unit.Quantity,
selection_indices: npt.NDArray,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Run the free energy calculation using a multistate sampler.
Parameters
----------
system : openmm.System
The System to simulate.
positions : openmm.unit.Quantity
The positions of the System.
selection_indices : npt.NDArray
Indices of the System particles to write to file.
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
Raises
------
error
Exception if anything failed
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting simulation unit")
# Get the settings
settings = self._get_settings(self._inputs["protocol"].settings)
# Check for a restart
self.restart = self._check_restart(
output_settings=settings["output_settings"], shared_path=self.shared_basepath
)
# Get the lambda schedule
# TODO - this should be better exposed to users
lambdas = _rfe_utils.lambdaprotocol.LambdaProtocol(
functions=settings["lambda_settings"].lambda_functions,
windows=settings["lambda_settings"].lambda_windows,
)
# Get the compute platform
restrict_cpu = settings["forcefield_settings"].nonbonded_method.lower() == "nocutoff"
platform = omm_compute.get_openmm_platform(
platform_name=settings["engine_settings"].compute_platform,
gpu_device_index=settings["engine_settings"].gpu_device_index,
restrict_cpu_count=restrict_cpu,
)
# Get the integrator
integrator = self._get_integrator(
integrator_settings=settings["integrator_settings"],
simulation_settings=settings["simulation_settings"],
system=system,
)
try:
# Get the reporter
reporter = self._get_reporter(
storage_path=self.shared_basepath,
selection_indices=selection_indices,
output_settings=settings["output_settings"],
simulation_settings=settings["simulation_settings"],
)
# Get the sampler
sampler = self._get_sampler(
system=system,
positions=positions,
lambdas=lambdas,
integrator=integrator,
reporter=reporter,
simulation_settings=settings["simulation_settings"],
thermo_settings=settings["thermo_settings"],
alchem_settings=settings["alchemical_settings"],
platform=platform,
restart=self.restart,
dry=dry,
)
# Run the simulation
self._run_simulation(
sampler=sampler,
reporter=reporter,
simulation_settings=settings["simulation_settings"],
integrator_settings=settings["integrator_settings"],
output_settings=settings["output_settings"],
dry=dry,
)
finally:
# Have to wrap this in a try/except, because we might
# be in a situation where the reporter or sampler wasn't created
try:
# Order is reporter, contexts, sampler, integrator
reporter.close() # close to prevent file handle clashes
# clear GPU context
# Note: use cache.empty() when openmmtools #690 is resolved
for context in list(sampler.energy_context_cache._lru._data.keys()):
del sampler.energy_context_cache._lru._data[context]
for context in list(sampler.sampler_context_cache._lru._data.keys()):
del sampler.sampler_context_cache._lru._data[context]
# cautiously clear out the global context cache too
for context in list(openmmtools.cache.global_context_cache._lru._data.keys()):
del openmmtools.cache.global_context_cache._lru._data[context]
del sampler.sampler_context_cache, sampler.energy_context_cache
# Keep these around in a dry run so we can inspect things
if not dry:
# At this point we know the sampler exists, so we del the integrator
# first since it's associated with the sampler
del integrator, sampler
except UnboundLocalError:
pass
if not dry: # pragma: no-cover
return {
"nc": self.shared_basepath / settings["output_settings"].output_filename,
"checkpoint": self.shared_basepath
/ settings["output_settings"].checkpoint_storage_filename,
}
else:
return {
"sampler": sampler,
"integrator": integrator,
}
def _execute(
self,
ctx: gufe.Context,
*,
setup_results,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure that the environment hasn't changed
self._verify_execution_environment(setup_results.outputs)
# Get the relevant inputs for running the unit
system = deserialize(setup_results.outputs["system"])
positions = to_openmm(np.load(setup_results.outputs["positions"]) * offunit.nm)
selection_indices = setup_results.outputs["selection_indices"]
# Run the unit
outputs = self.run(
system=system,
positions=positions,
selection_indices=selection_indices,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
**outputs,
}
class HybridTopologyMultiStateAnalysisUnit(gufe.ProtocolUnit, HybridTopologyUnitMixin):
"""
Analysis unit for multi-state Hybrid Topology Protocol transformations.
"""
@staticmethod
def _analyze_multistate_energies(
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
sampler_method: str,
output_directory: pathlib.Path,
dry: bool,
):
"""
Analyze multistate energies and generate plots.
Parameters
----------
trajectory : pathlib.Path
Path to the NetCDF trajectory file.
checkpoint : pathlib.Path
The name of the checkpoint file. Note this is
relative in path to the trajectory file.
sampler_method : str
The multistate sampler method used.
output_directory : pathlib.Path
The path to where plots will be written.
dry : bool
Whether or not we are running a dry run.
"""
reporter = multistate.MultiStateReporter(
storage=trajectory,
# Note: openmmtools only wants the name of the checkpoint
# file, it assumes it to be in the same place as the trajectory
checkpoint_storage=checkpoint.name,
open_mode="r",
)
analyzer = multistate_analysis.MultistateEquilFEAnalysis(
reporter=reporter,
sampling_method=sampler_method,
result_units=offunit.kilocalorie_per_mole,
)
# Only create plots when not doing a dry run
if not dry:
analyzer.plot(filepath=output_directory, filename_prefix="")
analyzer.close()
reporter.close()
return analyzer.unit_results_dict
@staticmethod
def _structural_analysis(
pdb_file: pathlib.Path,
trj_file: pathlib.Path,
output_directory: pathlib.Path,
dry: bool,
) -> dict[str, str | pathlib.Path]:
"""
Run structural analysis using ``openfe-analysis``.
Parameters
----------
pdb_file : pathlib.Path
Path to the PDB file.
trj_file : pathlib.Path
Path to the trajectory file.
output_directory : pathlib.Path
The output directory where plots and the data NPZ file
will be stored.
dry : bool
Whether or not we are running a dry run.
Returns
-------
dict[str, str | pathlib.Path]
Dictionary containing either the path to the NPZ
file with the structural data, or the analysis error.
Notes
-----
Don't put energy analysis here as it uses the MultiStateReporter,
the structural analysis requires the file handle to be closed.
"""
from openfe_analysis import rmsd
try:
data = rmsd.gather_rms_data(pdb_file, trj_file)
# TODO: eventually change this to more specific exception types
except Exception as e:
return {"structural_analysis_error": str(e)}
# Generate relevant plots if not a dry run
if not dry:
if d := data["protein_2D_RMSD"]:
fig = plotting.plot_2D_rmsd(d)
fig.savefig(output_directory / "protein_2D_RMSD.png")
plt.close(fig)
f2 = plotting.plot_ligand_COM_drift(data["time(ps)"], data["ligand_wander"])
f2.savefig(output_directory / "ligand_COM_drift.png")
plt.close(f2)
f3 = plotting.plot_ligand_RMSD(data["time(ps)"], data["ligand_RMSD"])
f3.savefig(output_directory / "ligand_RMSD.png")
plt.close(f3)
# Write out an NPZ with all the relevant analysis data
npz_file = output_directory / "structural_analysis.npz"
np.savez_compressed(
npz_file,
protein_RMSD=np.asarray(data["protein_RMSD"], dtype=np.float32),
ligand_RMSD=np.asarray(data["ligand_RMSD"], dtype=np.float32),
ligand_COM_drift=np.asarray(data["ligand_wander"], dtype=np.float32),
protein_2D_RMSD=np.asarray(data["protein_2D_RMSD"], dtype=np.float32),
time_ps=np.asarray(data["time(ps)"], dtype=np.float32),
)
return {"structural_analysis": npz_file}
def run(
self,
*,
pdb_file: pathlib.Path,
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Analyze the multistate simulation.
Parameters
----------
pdb_file : pathlib.Path
Path to the PDB file representing the subsampled structure.
trajectory : pathlib.Path
Path to the MultiStateReporter generated NetCDF file.
checkpoint : pathlib.Path
Path to the checkpoint file generated by MultiStateReporter.
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
Raises
------
error
Exception if anything failed
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting simulation analysis unit")
# Get the settings
settings = self._get_settings(self._inputs["protocol"].settings)
# Energies analysis
if verbose:
self.logger.info("Analyzing energies")
energy_analysis = self._analyze_multistate_energies(
trajectory=trajectory,
checkpoint=checkpoint,
sampler_method=settings["simulation_settings"].sampler_method.lower(),
output_directory=self.shared_basepath,
dry=dry,
)
# Structural analysis
if verbose:
self.logger.info("Analyzing structural outputs")
structural_analysis = self._structural_analysis(
pdb_file=pdb_file,
trj_file=trajectory,
output_directory=self.shared_basepath,
dry=dry,
)
# Return relevant things
outputs = energy_analysis | structural_analysis
return outputs
def _execute(
self,
ctx: gufe.Context,
*,
setup_results,
simulation_results,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure that we the environment hasn't changed
self._verify_execution_environment(setup_results.outputs)
pdb_file = setup_results.outputs["pdb_structure"]
selection_indices = setup_results.outputs["selection_indices"]
trajectory = simulation_results.outputs["nc"]
checkpoint = simulation_results.outputs["checkpoint"]
outputs = self.run(
pdb_file=pdb_file,
trajectory=trajectory,
checkpoint=checkpoint,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
# We include various other outputs here to make
# things easier when gathering.
"pdb_structure": pdb_file,
"trajectory": trajectory,
"checkpoint": checkpoint,
"selection_indices": selection_indices,
**outputs,
}
================================================
FILE: src/openfe/protocols/openmm_septop/__init__.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Run SepTop free energy calculations using OpenMM and OpenMMTools.
"""
from .equil_septop_method import (
SepTopComplexAnalysisUnit,
SepTopComplexRunUnit,
SepTopComplexSetupUnit,
SepTopProtocol,
SepTopProtocolResult,
SepTopSolventAnalysisUnit,
SepTopSolventRunUnit,
SepTopSolventSetupUnit,
)
from .equil_septop_settings import (
SepTopSettings,
)
__all__ = [
"SepTopProtocol",
"SepTopSettings",
"SepTopProtocolResult",
"SepTopComplexSetupUnit",
"SepTopSolventSetupUnit",
"SepTopSolventRunUnit",
"SepTopComplexRunUnit",
"SepTopSolventAnalysisUnit",
"SeptopComplexAnalysisUnit",
]
================================================
FILE: src/openfe/protocols/openmm_septop/base_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM Equilibrium SepTop Protocol base classes
==================================================
Base classes for the equilibrium OpenMM SepTop free energy ProtocolUnits.
This mostly implements BaseSepTopUnit whose methods can be
overridden to define different types of alchemical transformations.
TODO
----
* Add in all the AlchemicalFactory and AlchemicalRegion kwargs
as settings.
"""
import abc
import logging
import pathlib
from typing import Any, Literal, Optional
import gufe
import numpy.typing as npt
import openmm
import openmmtools
from gufe import (
ChemicalSystem,
ProteinComponent,
SmallMoleculeComponent,
SolventComponent,
)
from gufe.components import Component
from gufe.protocols.errors import ProtocolUnitExecutionError
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.units import unit as offunit
from openff.units.openmm import ensure_quantity, from_openmm, to_openmm
from openmm import unit as omm_unit
from openmmforcefields.generators import SystemGenerator
from openmmtools import multistate
from openmmtools.alchemy import AbsoluteAlchemicalFactory, AlchemicalRegion
from openmmtools.states import (
SamplerState,
ThermodynamicState,
create_thermodynamic_state_protocol,
)
import openfe
from openfe.protocols.openmm_afe.equil_afe_settings import (
AlchemicalSettings,
BaseSolvationSettings,
IntegratorSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSystemGeneratorFFSettings,
ThermoSettings,
)
from openfe.protocols.openmm_md.plain_md_methods import PlainMDSimulationUnit
from openfe.protocols.openmm_utils import omm_compute
from openfe.protocols.openmm_utils.omm_settings import SettingsBaseModel
from openfe.protocols.openmm_utils.serialization import deserialize
from openfe.utils import log_system_probe, without_oechem_backend
from ..openmm_utils import (
charge_generation,
multistate_analysis,
settings_validation,
system_creation,
system_validation,
)
from ..openmm_utils.mdtraj_utils import mdtraj_from_openmm
from .utils import SepTopParameterState
logger = logging.getLogger(__name__)
def _pre_equilibrate(
system: openmm.System,
topology: openmm.app.Topology,
positions: omm_unit.Quantity,
settings: dict[str, SettingsBaseModel],
endstate: Literal["A", "B", "AB"],
dry: bool,
shared_basepath: pathlib.Path,
platform: openmm.Platform,
verbose: bool,
logger,
) -> tuple[omm_unit.Quantity, omm_unit.Quantity]:
"""
Run a non-alchemical equilibration to get a stable system.
Parameters
----------
system : openmm.System
An OpenMM System to equilibrate.
topology : openmm.app.Topology
OpenMM Topology of the System.
positions : openmm.unit.Quantity
Initial positions for the system.
settings : dict[str, SettingsBaseModel]
A dictionary of settings objects. Expects the
following entries:
* `engine_settings`
* `thermo_settings`
* `integrator_settings`
* `equil_simulation_settings`
* `equil_output_settings`
endstate: Literal['A', 'B', 'AB']
The endstate that is pre-equilibrated,can be 'A', 'B' or 'AB'.
dry: bool
Whether or not this is a dry run.
shared_basepath: pathlib.Path
The Path to the shared storage.
verbose: bool
Whether to print extra information
logger: logging.getLogger
Name of the logger
Returns
-------
equilibrated_positions : npt.NDArray
Equilibrated system positions
box : openmm.unit.Quantity
Box vectors of the equilibrated system.
"""
# Prep the simulation object
integrator = openmm.LangevinMiddleIntegrator(
to_openmm(settings["thermo_settings"].temperature),
to_openmm(settings["integrator_settings"].langevin_collision_rate),
to_openmm(settings["integrator_settings"].timestep),
)
simulation = openmm.app.Simulation(
topology=topology,
system=system,
integrator=integrator,
platform=platform,
)
# Get the necessary number of steps
if settings["equil_simulation_settings"].equilibration_length_nvt is not None:
equil_steps_nvt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].equilibration_length_nvt,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
else:
equil_steps_nvt = None
equil_steps_npt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].equilibration_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
prod_steps_npt = settings_validation.get_simsteps(
sim_length=settings["equil_simulation_settings"].production_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=1,
)
if verbose:
logger.info("running non-alchemical equilibration MD")
# Don't do anything if we're doing a dry run
if dry:
box = system.getDefaultPeriodicBoxVectors()
return positions, to_openmm(from_openmm(box))
# TODO: Refactor this part to live outside the method call
# We have to modify the output settings to have different output
# names for the files from the two end states
unfrozen_outsettings = settings["equil_output_settings"].unfrozen_copy()
if endstate == "A" or endstate == "B" or endstate == "AB":
if unfrozen_outsettings.production_trajectory_filename:
unfrozen_outsettings.production_trajectory_filename = (
unfrozen_outsettings.production_trajectory_filename + f"_state{endstate}.xtc"
)
if unfrozen_outsettings.preminimized_structure:
unfrozen_outsettings.preminimized_structure = (
unfrozen_outsettings.preminimized_structure + f"_state{endstate}.pdb"
)
if unfrozen_outsettings.minimized_structure:
unfrozen_outsettings.minimized_structure = (
unfrozen_outsettings.minimized_structure + f"_state{endstate}.pdb"
)
if unfrozen_outsettings.equil_nvt_structure:
unfrozen_outsettings.equil_nvt_structure = (
unfrozen_outsettings.equil_nvt_structure + f"_state{endstate}.pdb"
)
if unfrozen_outsettings.equil_npt_structure:
unfrozen_outsettings.equil_npt_structure = (
unfrozen_outsettings.equil_npt_structure + f"_state{endstate}.pdb"
)
if unfrozen_outsettings.log_output:
unfrozen_outsettings.log_output = (
unfrozen_outsettings.log_output + f"_state{endstate}.log"
)
else:
errmsg = f"Only 'A', 'B', and 'AB' are accepted as endstates. Got {endstate}"
raise ValueError(errmsg)
# Use the _run_MD method from the PlainMDSimulationUnit
# Should in-place modify the simulation
PlainMDSimulationUnit._run_MD(
simulation=simulation,
positions=positions,
simulation_settings=settings["equil_simulation_settings"],
output_settings=unfrozen_outsettings,
temperature=settings["thermo_settings"].temperature,
barostat_frequency=settings["integrator_settings"].barostat_frequency,
timestep=settings["integrator_settings"].timestep,
equil_steps_nvt=equil_steps_nvt,
equil_steps_npt=equil_steps_npt,
prod_steps=prod_steps_npt,
verbose=verbose,
shared_basepath=shared_basepath,
)
state = simulation.context.getState(
getPositions=True,
)
equilibrated_positions = state.getPositions(asNumpy=True)
box = state.getPeriodicBoxVectors()
# cautiously delete out contexts & integrator
del simulation.context, integrator
return equilibrated_positions, to_openmm(from_openmm(box))
class SepTopUnitMixin:
"""
Mixin for SepTop ProtocolUnits, defining some of the common methods.
"""
def _prepare(
self,
verbose: bool,
scratch_basepath: pathlib.Path | None,
shared_basepath: pathlib.Path | None,
):
"""
Set basepaths and do some initial logging.
Parameters
----------
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath : pathlib.Path | None
Optional base path to write scratch files to.
shared_basepath : pathlib.Path | None
Optional base path to write shared files to.
"""
self.verbose = verbose
# set basepaths
def _set_optional_path(basepath):
if basepath is None:
return pathlib.Path(".")
return basepath
self.scratch_basepath = _set_optional_path(scratch_basepath)
self.shared_basepath = _set_optional_path(shared_basepath)
@abc.abstractmethod
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Get a dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* solvation_settings : BaseSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : MDOutputSettings
* simulation_settings : MultiStateSimulationSettings
* output_settings : MultiStateOutputSettings
Settings may change depending on what type of simulation you are
running. Cherry pick them and return them to be available later on.
This method should also add various validation checks as necessary.
Note
----
Must be implemented in the child class.
"""
...
@staticmethod
def _verify_execution_environment(
setup_outputs: dict[str, Any],
) -> None:
"""
Check that the Python environment hasn't changed based on the
relevant Python library versions stored in the setup outputs.
"""
try:
if (
(gufe.__version__ != setup_outputs["gufe_version"])
or (openfe.__version__ != setup_outputs["openfe_version"])
or (openmm.__version__ != setup_outputs["openmm_version"])
):
errmsg = "Python environment has changed, cannot continue Protocol execution."
raise ProtocolUnitExecutionError(errmsg)
except KeyError as e:
errmsg = "Missing environment information from setup outputs."
raise ProtocolUnitExecutionError(errmsg) from e
class BaseSepTopSetupUnit(gufe.ProtocolUnit, SepTopUnitMixin):
"""
Base class for the setup of ligand SepTop RBFE free energy transformations.
"""
def _get_alchemical_system(
self,
system: openmm.System,
alchem_indices_A: list[int],
alchem_indices_B: list[int],
alchemical_settings: AlchemicalSettings,
) -> tuple[AbsoluteAlchemicalFactory, openmm.System]:
"""
Get an alchemically modified system and its associated factory
Parameters
----------
system : openmm.System
System to alchemically modify.
alchem_indices_A : list[int]
A list of atom indices for the alchemically modified
ligand A in the system.
alchem_indices_B : list[int]
A list of atom indices for the alchemically modified
ligand B in the system.
alchemical_settings : AlchemicalSettings
Settings controlling how the alchemical system will be built.
Returns
-------
alchemical_factory : AbsoluteAlchemicalFactory
Factory for creating an alchemically modified system.
alchemical_system : openmm.System
Alchemically modified system
"""
alchemical_factory = AbsoluteAlchemicalFactory(
consistent_exceptions=False,
switch_width=1.0 * offunit.angstroms,
alchemical_pme_treatment="exact",
alchemical_rf_treatment="switched",
disable_alchemical_dispersion_correction=alchemical_settings.disable_alchemical_dispersion_correction,
split_alchemical_forces=True,
)
# Alchemical Region for ligand A
alchemical_region_A = AlchemicalRegion(
alchemical_atoms=alchem_indices_A,
name="A",
softcore_alpha=alchemical_settings.softcore_alpha,
annihilate_electrostatics=True,
annihilate_sterics=alchemical_settings.annihilate_sterics,
softcore_a=alchemical_settings.softcore_a,
softcore_b=alchemical_settings.softcore_b,
softcore_c=alchemical_settings.softcore_c,
softcore_beta=0.0,
softcore_d=1.0,
softcore_e=1.0,
softcore_f=2.0,
)
# Alchemical Region for ligand B
alchemical_region_B = AlchemicalRegion(
alchemical_atoms=alchem_indices_B,
name="B",
softcore_alpha=alchemical_settings.softcore_alpha,
annihilate_electrostatics=True,
annihilate_sterics=alchemical_settings.annihilate_sterics,
softcore_a=alchemical_settings.softcore_a,
softcore_b=alchemical_settings.softcore_b,
softcore_c=alchemical_settings.softcore_c,
softcore_beta=0.0,
softcore_d=1.0,
softcore_e=1.0,
softcore_f=2.0,
)
alchemical_system = alchemical_factory.create_alchemical_system(
system, [alchemical_region_A, alchemical_region_B]
)
return alchemical_factory, alchemical_system
@abc.abstractmethod
def _get_components(
self,
) -> tuple[
dict[str, list[Component]],
Optional[gufe.SolventComponent],
Optional[gufe.ProteinComponent],
dict[SmallMoleculeComponent, OFFMolecule],
]:
"""
Get the relevant components to create the alchemical system with.
Note
----
Must be implemented in the child class.
"""
...
def _get_system_generator(
self,
settings: dict[str, SettingsBaseModel],
solvent_comp: Optional[SolventComponent],
) -> SystemGenerator:
"""
Get a system generator through the system creation
utilities
Parameters
----------
settings : dict[str, SettingsBaseModel]
A dictionary of settings object for the unit.
solvent_comp : Optional[SolventComponent]
The solvent component of this system, if there is one.
Returns
-------
system_generator : openmmforcefields.generator.SystemGenerator
System Generator to parameterise this unit.
"""
ffcache = settings["output_settings"].forcefield_cache
if ffcache is not None:
ffcache = self.shared_basepath / ffcache
# Block out oechem backend to avoid any issues with
# smiles roundtripping between rdkit and oechem
with without_oechem_backend():
system_generator = system_creation.get_system_generator(
forcefield_settings=settings["forcefield_settings"],
integrator_settings=settings["integrator_settings"],
thermo_settings=settings["thermo_settings"],
cache=ffcache,
has_solvent=solvent_comp is not None,
)
return system_generator
@staticmethod
def _assign_partial_charges(
partial_charge_settings: OpenFFPartialChargeSettings,
smc_components: dict[SmallMoleculeComponent, OFFMolecule],
) -> None:
"""
Assign partial charges to OFFMolecules inplace.
Parameters
----------
charge_settings : OpenFFPartialChargeSettings
Settings for controlling how the partial charges are assigned.
smc_components : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
SmallMoleculeComponent.
"""
for mol in smc_components.values():
charge_generation.assign_offmol_partial_charges(
offmol=mol,
overwrite=False,
method=partial_charge_settings.partial_charge_method,
toolkit_backend=partial_charge_settings.off_toolkit_backend,
generate_n_conformers=partial_charge_settings.number_of_conformers,
nagl_model=partial_charge_settings.nagl_model,
)
def _get_modeller(
self,
protein_component: Optional[ProteinComponent],
solvent_component: SolventComponent,
smc_components: dict[SmallMoleculeComponent, OFFMolecule],
system_generator: SystemGenerator,
solvation_settings: BaseSolvationSettings,
) -> tuple[openmm.app.Modeller, dict[Component, npt.NDArray]]:
"""
Get an OpenMM Modeller object and a list of residue indices
for each component in the system.
Parameters
----------
protein_component : Optional[ProteinComponent]
Protein Component, if it exists.
solvent_component : SolventComponent
Solvent Component.
smc_components : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
SmallMoleculeComponent.
system_generator : openmmforcefields.generator.SystemGenerator
System Generator to parameterise this unit.
partial_charge_settings : BasePartialChargeSettings
Settings detailing how to assign partial charges to the
SMCs of the system.
solvation_settings : BaseSolvationSettings
Settings detailing how to solvate the system.
Returns
-------
system_modeller : openmm.app.Modeller
OpenMM Modeller object generated from ProteinComponent and
OpenFF Molecules.
comp_resids : dict[Component, npt.NDArray]
Dictionary of residue indices for each component in system.
"""
if self.verbose:
self.logger.info("Parameterizing molecules")
# TODO: guard the following from non-RDKit backends
# force the creation of parameters for the small molecules
# this is necessary because we need to have the FF generated ahead
# of solvating the system.
# Block out oechem backend to avoid any issues with
# smiles roundtripping between rdkit and oechem
with without_oechem_backend():
for mol in smc_components.values():
system_generator.create_system(mol.to_topology().to_openmm(), molecules=[mol])
# get OpenMM modeller + dictionary of resids for each component
system_modeller, comp_resids = system_creation.get_omm_modeller(
protein_comp=protein_component,
solvent_comp=solvent_component,
small_mols=smc_components,
omm_forcefield=system_generator.forcefield,
solvent_settings=solvation_settings,
)
return system_modeller, comp_resids
def _get_omm_objects(
self,
system_modeller: openmm.app.Modeller,
system_generator: SystemGenerator,
smc_components: list[OFFMolecule],
) -> tuple[openmm.app.Topology, openmm.unit.Quantity, openmm.System]:
"""
Get the OpenMM Topology, Positions and System of the
parameterised system.
Parameters
----------
system_modeller : openmm.app.Modeller
OpenMM Modeller object representing the system to be
parametrized.
system_generator : SystemGenerator
The SystemGenerator object to create a System with.
smc_components : list[openff.toolkit.Molecule]
A list of openff Molecules to add to the system.
Returns
-------
topology : openmm.app.Topology
Topology object describing the parameterized system
system : openmm.System
An OpenMM System of the alchemical system.
positions : openmm.unit.Quantity
Positions of the system.
"""
topology = system_modeller.getTopology()
# roundtrip positions to remove vec3 issues
positions = to_openmm(from_openmm(system_modeller.getPositions()))
# Block out oechem backend to avoid any issues with
# smiles roundtripping between rdkit and oechem
with without_oechem_backend():
system = system_generator.create_system(
system_modeller.topology,
molecules=smc_components,
)
return topology, system, positions
@staticmethod
def _get_atom_indices(
omm_topology: openmm.app.Topology,
comp_resids: dict[Component, npt.NDArray],
) -> dict[Component, list]:
"""
Get all the atom indices for each component in the system, based on
the dictionary of residue indices for each component.
Parameters
----------
omm_topology: openmm.app.Topology
OpenMM Topology object with the full system.
comp_resids: dict[Component, npt.NDArray]
Dictionary of the components in the topology with their residue indices.
Returns
-------
comp_atomids: dict[Component, list]
A dictionary of atom indices for each component in the System.
"""
comp_atomids = {}
for key, values in comp_resids.items():
atom_indices = []
for residue in omm_topology.residues():
if residue.index in values:
atom_indices.extend([atom.index for atom in residue.atoms()])
comp_atomids[key] = atom_indices
return comp_atomids
@staticmethod
def get_smc_comps(
alchem_comps: dict[str, list[Component]],
smc_comps: dict[SmallMoleculeComponent, OFFMolecule],
) -> tuple[
dict[SmallMoleculeComponent, OFFMolecule],
dict[SmallMoleculeComponent, OFFMolecule],
dict[SmallMoleculeComponent, OFFMolecule],
]:
# Get smcs for the different states and the common smcs
smc_off_A = {m: m.to_openff() for m in alchem_comps["stateA"]}
smc_off_B = {m: m.to_openff() for m in alchem_comps["stateB"]}
# Common smcs could e.g. be cofactors
smc_off_both = {
m: m.to_openff()
for m in smc_comps
if (m not in alchem_comps["stateA"] and m not in alchem_comps["stateB"])
}
smc_comps_A = smc_off_A | smc_off_both
smc_comps_B = smc_off_B | smc_off_both
smc_comps_AB = smc_off_A | smc_off_B | smc_off_both
return smc_comps_A, smc_comps_B, smc_comps_AB
def get_system(
self,
solv_comp: SolventComponent,
prot_comp: ProteinComponent,
smc_comp: dict[SmallMoleculeComponent, OFFMolecule],
settings: dict[str, SettingsBaseModel],
):
"""
Creates an OpenMM system, topology, positions, modeller and also
residue IDs of the different components
Parameters
----------
solv_comp: SolventComponent
prot_comp: Optional[ProteinComponent]
smc_comp: dict[SmallMoleculeComponent,OFFMolecule]
settings: dict[str, SettingsBaseModel]
A dictionary of settings object for the unit.
Returns
-------
omm_system: openmm.app.System
omm_topology: openmm.app.Topology
positions: openmm.unit.Quantity
system_modeller: openmm.app.Modeller
comp_resids: dict[Component, npt.NDArray]
A dictionary of residues for each component in the System.
"""
# Get system generator
system_generator = self._get_system_generator(settings, solv_comp)
# Get modeller
system_modeller, comp_resids = self._get_modeller(
prot_comp,
solv_comp,
smc_comp,
system_generator,
settings["solvation_settings"],
)
# Get OpenMM topology, positions and system
omm_topology, omm_system, positions = self._get_omm_objects(
system_modeller, system_generator, list(smc_comp.values())
)
return omm_system, omm_topology, positions, system_modeller, comp_resids
@staticmethod
def _subsample_topology(
topology: openmm.app.Topology,
positions: openmm.unit.Quantity,
output_selection: str,
output_file: pathlib.Path,
) -> npt.NDArray:
"""
Subsample the system based on user-selected output selection
and write the subsampled topology to a PDB file.
Parameters
----------
topology : openmm.app.Topology
The system topology to subsample.
positions : openmm.unit.Quantity
The system positions.
output_selection : str
An MDTraj selection string to subsample the topology with.
output_file : pathlib.Path
Path to the file to write the PDB to.
Returns
-------
selection_indices : npt.NDArray
The indices of the subselected system.
"""
traj = mdtraj_from_openmm(topology, positions)
selection_indices = traj.topology.select(output_selection)
# Write out the subselected structure to PDB if not empty
if len(selection_indices) > 0:
sub_traj = traj.atom_slice(selection_indices)
sub_traj.save_pdb(output_file)
return selection_indices
def _execute(
self,
ctx: gufe.Context,
**kwargs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
outputs = self.run(scratch_basepath=ctx.scratch, shared_basepath=ctx.shared)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
"openmm_version": openmm.__version__,
"openfe_version": openfe.__version__,
"gufe_version": gufe.__version__,
**outputs,
}
class BaseSepTopRunUnit(gufe.ProtocolUnit, SepTopUnitMixin):
"""
Base class for running ligand SepTop RBFE free energy transformations.
"""
@staticmethod
def _check_restart(output_settings: SettingsBaseModel, shared_path: pathlib.Path):
"""
Check if we are doing a restart.
Parameters
----------
output_settings : SettingsBaseModel
The simulation output settings
shared_path : pathlib.Path
The shared directory where we should be looking for existing files.
Raises
------
IOError
If one of the trajectory or checkpoint files are present
without the other.
Notes
-----
For now this just checks if the netcdf files are present in the
shared directory but in the future this may expand depending on
how warehouse works.
"""
trajectory = shared_path / output_settings.output_filename
checkpoint = shared_path / output_settings.checkpoint_storage_filename
if trajectory.is_file() and checkpoint.is_file():
return True
elif trajectory.is_file() ^ checkpoint.is_file():
if trajectory.is_file():
errmsg = "the trajectory file is present but not the checkpoint file. "
else:
errmsg = "the checkpoint file is present but not the trajectory file. "
errmsg = (
"Attempting to restart but "
+ errmsg
+ "This should not happen under normal circumstances."
)
raise IOError(errmsg)
else:
return False
@abc.abstractmethod
def _get_components(
self,
) -> tuple[
dict[str, list[Component]],
Optional[gufe.SolventComponent],
Optional[gufe.ProteinComponent],
dict[SmallMoleculeComponent, OFFMolecule],
]:
"""
Get the relevant components to create the alchemical system with.
Note
----
Must be implemented in the child class.
"""
...
@abc.abstractmethod
def _get_lambda_schedule(
self, settings: dict[str, SettingsBaseModel]
) -> dict[str, list[float]]:
"""
Create the lambda schedule
Parameters
----------
settings : dict[str, SettingsBaseModel]
Settings for the unit.
Returns
-------
lambdas : dict[str, list[float]]
Note
----
Must be implemented in the child class.
"""
...
def _get_states(
self,
alchemical_system: openmm.System,
positions: openmm.unit.Quantity,
box_vectors: Optional[openmm.unit.Quantity],
settings: dict[str, SettingsBaseModel],
lambdas: dict[str, list[float]],
solvent_comp: Optional[SolventComponent],
) -> tuple[list[SamplerState], list[ThermodynamicState]]:
"""
Get a list of sampler and thermodynmic states from an
input alchemical system.
Parameters
----------
alchemical_system : openmm.System
Alchemical system to get states for.
positions : openmm.unit.Quantity
Positions of the alchemical system.
box_vectors : Optional[openmm.unit.Quantity]
Box vectors of the alchemical system.
settings : dict[str, SettingsBaseModel]
A dictionary of settings for the protocol unit.
lambdas : dict[str, list[float]]
A dictionary of lambda scales.
solvent_comp : Optional[SolventComponent]
The solvent component of the system, if there is one.
Returns
-------
sampler_states : list[SamplerState]
A list of SamplerStates for each replica in the system.
cmp_states : list[ThermodynamicState]
A list of ThermodynamicState for each replica in the system.
"""
alchemical_state = SepTopParameterState.from_system(alchemical_system)
# Set up the system constants
temperature = settings["thermo_settings"].temperature
pressure = settings["thermo_settings"].pressure
constants = dict()
constants["temperature"] = ensure_quantity(temperature, "openmm")
if solvent_comp is not None:
constants["pressure"] = ensure_quantity(pressure, "openmm")
cmp_states = create_thermodynamic_state_protocol(
alchemical_system,
protocol=lambdas,
constants=constants,
composable_states=[alchemical_state],
)
sampler_state = SamplerState(positions=positions)
if alchemical_system.usesPeriodicBoundaryConditions():
sampler_state.box_vectors = box_vectors
sampler_states = [sampler_state for _ in cmp_states]
return sampler_states, cmp_states
@staticmethod
def _get_integrator(
integrator_settings: IntegratorSettings,
simulation_settings: MultiStateSimulationSettings,
system: openmm.System,
) -> openmmtools.mcmc.LangevinDynamicsMove:
"""
Return a LangevinDynamicsMove integrator
Parameters
----------
integrator_settings : IntegratorSettings
Settings controlling the Langevin integrator.
simulation_settings : MultiStateSimulationSettings
Settings controlling the simulation.
system: openmm.System
The OpenMM System being simulated.
Returns
-------
integrator : openmmtools.mcmc.LangevinDynamicsMove
A configured integrator object.
"""
steps_per_iteration = settings_validation.convert_steps_per_iteration(
simulation_settings, integrator_settings
)
integrator = openmmtools.mcmc.LangevinDynamicsMove(
timestep=to_openmm(integrator_settings.timestep),
collision_rate=to_openmm(integrator_settings.langevin_collision_rate),
n_steps=steps_per_iteration,
reassign_velocities=integrator_settings.reassign_velocities,
n_restart_attempts=integrator_settings.n_restart_attempts,
constraint_tolerance=integrator_settings.constraint_tolerance,
)
# Validate for known issue when dealing with virtual sites
# and mutltistate simulations
if not integrator_settings.reassign_velocities:
for particle_idx in range(system.getNumParticles()):
if system.isVirtualSite(particle_idx):
errmsg = (
"Simulations with virtual sites without velocity "
"reassignments are unstable with MCMC integrators. "
"You can set `reassign_velocities` to ``True`` in the "
"`integrator_settings` to avoid this issue."
)
raise ValueError(errmsg)
return integrator
@staticmethod
def _get_reporter(
storage_path: pathlib.Path,
selection_indices: npt.NDArray,
simulation_settings: MultiStateSimulationSettings,
output_settings: MultiStateOutputSettings,
) -> multistate.MultiStateReporter:
"""
Get a MultistateReporter for the simulation you are running.
Parameters
----------
storage_path : pathlib.Path
Path to the directory where files should be written.
selection_indices : npt.NDArray
Array of system particle indices to subsample the system by.
simulation_settings : MultiStateSimulationSettings
Multistate simulation control settings, specifically containing
the amount of time per state sampling iteration.
output_settings: MultiStateOutputSettings
Output settings for the simulations
Returns
-------
reporter : multistate.MultiStateReporter
The reporter for the simulation.
Notes
-----
All this does is create the reporter, it works for both
new reporters and if we are doing a restart.
"""
# Define the trajectory & checkpoint files
nc = storage_path / output_settings.output_filename
# The checkpoint file in openmmtools is taken as the file relative
# to the location of the nc file, so you only want the filename
chk = output_settings.checkpoint_storage_filename
if output_settings.positions_write_frequency is not None:
pos_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.positions_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' position_write_frequency",
denominator_name="simulation settings' time_per_iteration",
)
else:
pos_interval = 0
if output_settings.velocities_write_frequency is not None:
vel_interval = settings_validation.divmod_time_and_check(
numerator=output_settings.velocities_write_frequency,
denominator=simulation_settings.time_per_iteration,
numerator_name="output settings' velocity_write_frequency",
denominator_name="simulation settings' time_per_iteration",
)
else:
vel_interval = 0
chk_intervals = settings_validation.convert_checkpoint_interval_to_iterations(
checkpoint_interval=output_settings.checkpoint_interval,
time_per_iteration=simulation_settings.time_per_iteration,
)
return multistate.MultiStateReporter(
storage=nc,
analysis_particle_indices=selection_indices,
checkpoint_interval=chk_intervals,
checkpoint_storage=chk,
position_interval=pos_interval,
velocity_interval=vel_interval,
)
@staticmethod
def _get_sampler(
integrator: openmmtools.mcmc.LangevinDynamicsMove,
reporter: openmmtools.multistate.MultiStateReporter,
simulation_settings: MultiStateSimulationSettings,
thermodynamic_settings: ThermoSettings,
compound_states: list[ThermodynamicState],
sampler_states: list[SamplerState],
platform: openmm.Platform,
restart: bool,
) -> multistate.MultiStateSampler:
"""
Get a sampler based on the equilibrium sampling method requested.
Parameters
----------
integrator : openmmtools.mcmc.LangevinDynamicsMove
The simulation integrator.
reporter : openmmtools.multistate.MultiStateReporter
The reporter to hook up to the sampler.
simulation_settings : MultiStateSimulationSettings
Settings for the alchemical sampler.
thermodynamic_settings : ThermoSettings
Thermodynamic settings
compound_states : list[ThermodynamicState]
A list of thermodynamic states to sample.
sampler_states : list[SamplerState]
A list of sampler states.
platform : openmm.Platform
The compute platform to use.
restart : bool
If we are restarting the simulation.
Returns
-------
sampler : multistate.MultistateSampler
A sampler configured for the chosen sampling method.
"""
_SAMPLERS = {
"repex": multistate.ReplicaExchangeSampler,
"sams": multistate.SAMSSampler,
"independent": multistate.MultiStateSampler,
}
sampler_method = simulation_settings.sampler_method.lower()
try:
sampler_class = _SAMPLERS[sampler_method]
except KeyError:
errmsg = f"Unknown sampler {sampler_method}"
raise AttributeError(errmsg)
# Get the real time analysis values to use
rta_its, rta_min_its = settings_validation.convert_real_time_analysis_iterations(
simulation_settings=simulation_settings,
)
# Get the number of production iterations to run for
steps_per_iteration = integrator.n_steps
timestep = from_openmm(integrator.timestep)
number_of_iterations = int(
settings_validation.get_simsteps(
sim_length=simulation_settings.production_length,
timestep=timestep,
mc_steps=steps_per_iteration,
)
/ steps_per_iteration
)
# convert early_termination_target_error from kcal/mol to kT
early_termination_target_error = (
settings_validation.convert_target_error_from_kcal_per_mole_to_kT(
thermodynamic_settings.temperature,
simulation_settings.early_termination_target_error,
)
)
sampler_kwargs = {
"mcmc_moves": integrator,
"online_analysis_interval": rta_its,
"online_analysis_target_error": early_termination_target_error,
"online_analysis_minimum_iterations": rta_min_its,
"number_of_iterations": number_of_iterations,
}
if sampler_method == "sams":
sampler_kwargs |= {
"flatness_criteria": simulation_settings.sams_flatness_criteria,
"gamma0": simulation_settings.sams_gamma0,
}
if sampler_method == "repex":
sampler_kwargs |= {
"replica_mixing_scheme": "swap-all",
}
if restart:
sampler = sampler_class.from_storage(reporter)
# We do some checks to make sure we are running the same system
# including ensuring that we have the same thermodynamic parameters and
# that the lambda schedule is the same.
for index, thermostate in enumerate(sampler._thermodynamic_states):
system_validation.assert_multistate_system_equality(
ref_system=compound_states[index].get_system(remove_thermostat=True),
stored_system=thermostate.get_system(remove_thermostat=True),
)
# Loop over each composable state (e.g. GlobalParameterState object)
# get the parameters and check that the values are the same.
for composable_state in compound_states[index]._composable_states:
for param in composable_state._parameters:
expected = getattr(compound_states[index], param)
stored = getattr(thermostate, param)
if expected != stored:
errmsg = (
f"System parameter {param} in checkpoint does "
"not match protocol system, cannot resume"
)
raise ValueError(errmsg)
if (
(simulation_settings.n_replicas != sampler.n_states)
or (simulation_settings.n_replicas != sampler.n_replicas)
or (sampler.mcmc_moves[0].n_steps != steps_per_iteration)
or (sampler.mcmc_moves[0].timestep != integrator.timestep)
):
errmsg = "System in checkpoint does not match protocol system, cannot resume"
raise ValueError(errmsg)
else:
sampler = sampler_class(**sampler_kwargs)
sampler.create(
thermodynamic_states=compound_states,
sampler_states=sampler_states,
storage=reporter,
)
# Get and set the context caches
sampler.energy_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
sampler.sampler_context_cache = openmmtools.cache.ContextCache(
capacity=None,
time_to_live=None,
platform=platform,
)
return sampler
def _run_simulation(
self,
sampler: multistate.MultiStateSampler,
reporter: multistate.MultiStateReporter,
settings: dict[str, SettingsBaseModel],
dry: bool,
):
"""
Run the simulation.
Parameters
----------
sampler : multistate.MultiStateSampler
The sampler associated with the simulation to run.
reporter : multistate.MultiStateReporter
The reporter associated with the sampler.
settings : dict[str, SettingsBaseModel]
The dictionary of settings for the protocol.
dry : bool
Whether or not to dry run the simulation
Returns
-------
unit_results_dict : Optional[dict]
A dictionary containing all the free energy results,
if not a dry run.
"""
# Get the relevant simulation steps
mc_steps = settings_validation.convert_steps_per_iteration(
simulation_settings=settings["simulation_settings"],
integrator_settings=settings["integrator_settings"],
)
equil_steps = settings_validation.get_simsteps(
sim_length=settings["simulation_settings"].equilibration_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=mc_steps,
)
prod_steps = settings_validation.get_simsteps(
sim_length=settings["simulation_settings"].production_length,
timestep=settings["integrator_settings"].timestep,
mc_steps=mc_steps,
)
if not dry: # pragma: no-cover
if sampler._iteration == 0:
# minimize
if self.verbose:
self.logger.info("minimizing systems")
sampler.minimize(max_iterations=settings["simulation_settings"].minimization_steps)
# equilibrate
if self.verbose:
self.logger.info("equilibrating systems")
sampler.equilibrate(int(equil_steps / mc_steps))
# At this point we are ready for production
if self.verbose:
self.logger.info("running production phase")
# We use `run` so that we're limited by the number of iterations
# we passed when we built the sampler.
sampler.run(n_iterations=int(prod_steps / mc_steps) - sampler._iteration)
if self.verbose:
self.logger.info("production phase complete")
else:
# close reporter when you're done, prevent file handle clashes
reporter.close()
# clean up the reporter file
fns = [
self.shared_basepath / settings["output_settings"].output_filename,
self.shared_basepath / settings["output_settings"].checkpoint_storage_filename,
]
for fn in fns:
fn.unlink()
def run(
self,
system: openmm.System,
pdb_file: openmm.app.pdbfile.PDBFile,
selection_indices: npt.NDArray,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""
Run the simulation part of the SepTop protocol.
Parameters
----------
system: openmm.System
System used for the SepTop calculation.
pdb_file: openmm.app.pdbfile.PDBFile
OpenMM PDBFile object representing the SepTop System.
selection_indices: npt.NDArray
The indices of the particles to output in the trajectory.
dry: bool
Do a dry run of the calculation, creating all necessary alchemical
system components (topology, system, sampler, etc...) but without
running the simulation, default False
verbose: bool
Verbose output of the simulation progress. Output is provided via
INFO level logging, default True
scratch_basepath : pathlib.Path | None
Path to the scratch (temporary) directory space.
shared_basepath : pathlib.Path | None
Path to the shared (persistent) directory space.
Returns
-------
dict: dict[str, Any]
Dictionary of the outputs created in the basepath directory
(e.g. path to the simulation .nc file, checkpoint file)
or the sampler if ``dry==True``.
"""
# 0. General preparation tasks
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Running the SepTop simulation.")
# Get settings, components, and positions
settings = self._get_settings()
alchem_comps, solv_comp, prot_comp, smc_comps = self._get_components()
positions = pdb_file.getPositions(asNumpy=True)
# Check for a restart
self.restart = self._check_restart(
output_settings=settings["output_settings"],
shared_path=self.shared_basepath,
)
# Get the compute platform
platform = omm_compute.get_openmm_platform(
platform_name=settings["engine_settings"].compute_platform,
gpu_device_index=settings["engine_settings"].gpu_device_index,
restrict_cpu_count=False,
)
# Check that the restraints are correctly applied by running a short equilibration
if not self.restart:
equil_positions, box_AB = _pre_equilibrate(
system=system,
topology=pdb_file.topology,
positions=positions,
settings=settings,
endstate="AB",
dry=dry,
shared_basepath=self.shared_basepath,
platform=platform,
verbose=self.verbose,
logger=self.logger,
)
else:
# If we are doing a restart, we will be using the positions
# in the existing checkpoint file and equil_positions is purely
# used to create the sampler & compound states (only used for
# checking the sampler being created in restarts).
# For the sake of simplicity, we assign equil_positions to
# the PDB positions and box_AB to the system vectors
equil_positions = positions
box_AB = system.getDefaultPeriodicBoxVectors()
# Get the lambda schedule
lambdas = self._get_lambda_schedule(settings)
# Get compound and sampler states
sampler_states, cmp_states = self._get_states(
alchemical_system=system,
positions=equil_positions,
box_vectors=box_AB,
settings=settings,
lambdas=lambdas,
solvent_comp=solv_comp,
)
# Get the integrator
integrator = self._get_integrator(
integrator_settings=settings["integrator_settings"],
simulation_settings=settings["simulation_settings"],
system=system,
)
# Wrap in try/finally to avoid memory leak issues
try:
# Get the reporter
reporter = self._get_reporter(
storage_path=self.shared_basepath,
selection_indices=selection_indices,
simulation_settings=settings["simulation_settings"],
output_settings=settings["output_settings"],
)
# Get the sampler
sampler = self._get_sampler(
integrator=integrator,
reporter=reporter,
simulation_settings=settings["simulation_settings"],
thermodynamic_settings=settings["thermo_settings"],
compound_states=cmp_states,
sampler_states=sampler_states,
platform=platform,
restart=self.restart,
)
# 8. Run simulation
self._run_simulation(
sampler,
reporter,
settings,
dry,
)
finally:
# Have to wrap this in a try/except, because we might
# be in a situatino where the reporter and sampler weren't created
try:
# Order is reporter, contexts, sampler, integrator
reporter.close()
# clear GPU context
# Note: use cache.empty() when openmmtools #690 is resolved
for context in list(sampler.energy_context_cache._lru._data.keys()):
del sampler.energy_context_cache._lru._data[context]
for context in list(sampler.sampler_context_cache._lru._data.keys()):
del sampler.sampler_context_cache._lru._data[context]
# cautiously clear out the global context cache too
for context in list(openmmtools.cache.global_context_cache._lru._data.keys()):
del openmmtools.cache.global_context_cache._lru._data[context]
del sampler.sampler_context_cache, sampler.energy_context_cache
# Keep these around in a dry run so we can inspect things
if not dry:
# At this point we know the sampler exists, so we del the integrator
# first since it's associated with the sampler
del integrator, sampler
except UnboundLocalError:
pass
if not dry:
nc = self.shared_basepath / settings["output_settings"].output_filename
chk = self.shared_basepath / settings["output_settings"].checkpoint_storage_filename
return {
"trajectory": nc,
"checkpoint": chk,
}
else:
return {
"sampler": sampler,
"integrator": integrator,
"equil_positions": equil_positions,
}
def _execute(
self,
ctx: gufe.Context,
*,
setup,
**kwargs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure the environment hasn't changed
self._verify_execution_environment(setup.outputs)
# Get the relevant inputs for running the unit
system = deserialize(setup.outputs["system"])
pdb_file = openmm.app.pdbfile.PDBFile(str(setup.outputs["topology"]))
selection_indices = setup.outputs["selection_indices"]
outputs = self.run(
system=system,
pdb_file=pdb_file,
selection_indices=selection_indices,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
**outputs,
}
class BaseSepTopAnalysisUnit(gufe.ProtocolUnit, SepTopUnitMixin):
@staticmethod
def _analyze_multistate_energies(
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
sampler_method: str,
output_directory: pathlib.Path,
dry: bool,
):
"""
Analyze multistate energies and generate plots.
Parameters
----------
trajectory : pathlib.Path
Path to the NetCDF trajectory file.
checkpoint : pathlib.Path
The name of the checkpoint file. Note this is
relative in path to the trajectory file.
sampler_method : str
The multistate sampler method used.
output_directory : pathlib.Path
The path to where plots will be written.
dry : bool
Whether or not we are running a dry run.
"""
reporter = multistate.MultiStateReporter(
storage=trajectory,
# Note: openmmtools only wants the name of the checkpoint
# file, it assumes it to be in the same place as the trajectory
checkpoint_storage=checkpoint.name,
open_mode="r",
)
analyzer = multistate_analysis.MultistateEquilFEAnalysis(
reporter=reporter,
sampling_method=sampler_method,
result_units=offunit.kilocalorie_per_mole,
)
# Only create plots when not doing a dry run
if not dry:
analyzer.plot(filepath=output_directory, filename_prefix="")
analyzer.close()
reporter.close()
return analyzer.unit_results_dict
def run(
self,
*,
trajectory: pathlib.Path,
checkpoint: pathlib.Path,
dry: bool = False,
verbose: bool = True,
scratch_basepath: pathlib.Path | None = None,
shared_basepath: pathlib.Path | None = None,
) -> dict[str, Any]:
"""Analyze the multistate simulation.
Parameters
----------
trajectory : pathlib.Path
Path to the MultiStateReporter generated NetCDF file.
checkpoint : pathlib.Path
Path to the checkpoint file generated by MultiStateReporter.
dry : bool
Do a dry run of the calculation, creating all necessary hybrid
system components (topology, system, sampler, etc...) but without
running the simulation.
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging.
scratch_basepath: pathlib.Path | None
Where to store temporary files, defaults to current working directory
shared_basepath : pathlib.Path | None
Where to run the calculation, defaults to current working directory
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
"""
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)
if self.verbose:
self.logger.info("Starting simulation analysis unit")
# Get the settings
settings = self._get_settings()
# Energies analysis
if verbose:
self.logger.info("Analyzing energies")
energy_analysis = self._analyze_multistate_energies(
trajectory=trajectory,
checkpoint=checkpoint,
sampler_method=settings["simulation_settings"].sampler_method.lower(),
output_directory=self.shared_basepath,
dry=dry,
)
return energy_analysis
def _execute(
self,
ctx: gufe.Context,
*,
setup,
simulation,
**inputs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])
# Ensure the environment hasn't changed
self._verify_execution_environment(setup.outputs)
# Get the relevant inputs for running the unit
trajectory = simulation.outputs["trajectory"]
checkpoint = simulation.outputs["checkpoint"]
outputs = self.run(
trajectory=trajectory,
checkpoint=checkpoint,
scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared,
)
# We re-include things here to make life easier when gathering results
if self.simtype == "complex":
previous_outputs = {
"standard_state_correction_A": setup.outputs["standard_state_correction_A"],
"standard_state_correction_B": setup.outputs["standard_state_correction_B"],
"restraint_geometry_A": setup.outputs["restraint_geometry_A"],
"restraint_geometry_B": setup.outputs["restraint_geometry_B"],
}
else:
previous_outputs = {
"standard_state_correction": setup.outputs["standard_state_correction"]
}
previous_outputs["subsampled_pdb_structure"] = setup.outputs["subsampled_pdb_structure"]
previous_outputs["selection_indices"] = setup.outputs["selection_indices"]
previous_outputs["trajectory"] = trajectory
previous_outputs["checkpoint"] = checkpoint
return {
"repeat_id": self._inputs["repeat_id"],
"generation": self._inputs["generation"],
"simtype": self.simtype,
**outputs,
**previous_outputs,
}
================================================
FILE: src/openfe/protocols/openmm_septop/equil_septop_method.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""OpenMM Equilibrium SepTop RBFE Protocol --- :mod:`openfe.protocols.openmm_septop.equil_septop_method`
========================================================================================================
This module implements the necessary methodology tooling to run a
Separated Topologies RBFE calculation using OpenMM tools and one of the
following alchemical sampling methods:
* Hamiltonian Replica Exchange
* Self-adjusted mixture sampling
* Independent window sampling
Current limitations
-------------------
* Transformations that involve net charge changes are currently not supported.
The ligands must have the same net charge.
* Only small molecules are allowed to act as alchemical molecules.
Alchemically changing protein or solvent components would induce
perturbations which are too large to be handled by this Protocol.
Acknowledgements
----------------
This Protocol is based on and inspired by the SepTop implementation from
the Mobleylab (https://github.com/MobleyLab/SeparatedTopologies) as well as
femto (https://github.com/Psivant/femto).
"""
from __future__ import annotations
import logging
import uuid
import warnings
from collections import defaultdict
from typing import Any, Iterable, Optional, Union
import gufe
from gufe import (
ChemicalSystem,
ProteinComponent,
ProteinMembraneComponent,
SmallMoleculeComponent,
SolvatedPDBComponent,
SolventComponent,
settings,
)
from gufe.components import Component
from openff.units import unit as offunit
from rdkit import Chem
from openfe.due import Doi, due
from openfe.protocols.openmm_septop.equil_septop_settings import (
AlchemicalSettings,
IntegratorSettings,
LambdaSettings,
MDSimulationSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
SepTopEquilOutputSettings,
SepTopSettings,
SettingsBaseModel,
)
from ..openmm_utils import settings_validation, system_validation
from ..restraint_utils.settings import (
BoreschRestraintSettings,
DistanceRestraintSettings,
)
from .septop_protocol_results import SepTopProtocolResult
from .septop_units import (
SepTopComplexAnalysisUnit,
SepTopComplexRunUnit,
SepTopComplexSetupUnit,
SepTopSolventAnalysisUnit,
SepTopSolventRunUnit,
SepTopSolventSetupUnit,
)
due.cite(
Doi("10.1021/acs.jctc.3c00282"),
description="Separated Topologies method",
path="openfe.protocols.openmm_septop.equil_septop_method",
cite_module=True,
)
due.cite(
Doi("10.5281/zenodo.596622"),
description="OpenMMTools",
path="openfe.protocols.openmm_septop.equil_septop_method",
cite_module=True,
)
due.cite(
Doi("10.1371/journal.pcbi.1005659"),
description="OpenMM",
path="openfe.protocols.openmm_septop.equil_septop_method",
cite_module=True,
)
logger = logging.getLogger(__name__)
def _check_alchemical_charge_difference(
ligandA: SmallMoleculeComponent,
ligandB: SmallMoleculeComponent,
):
"""
Checks and returns the difference in formal charge between state A
and B.
Raises
------
ValueError
* If a change in net charge is detected.
Parameters
----------
ligandA: SmallMoleculeComponent
ligandB: SmallMoleculeComponent
"""
chg_A = Chem.rdmolops.GetFormalCharge(ligandA.to_rdkit())
chg_B = Chem.rdmolops.GetFormalCharge(ligandB.to_rdkit())
difference = chg_A - chg_B
if abs(difference) != 0:
errmsg = (
f"A charge difference of {difference} is observed "
"between the end states. Unfortunately this protocol "
"currently does not support net charge changes."
)
raise ValueError(errmsg)
class SepTopProtocol(gufe.Protocol):
"""
SepTop RBFE calculations using OpenMM and OpenMMTools.
See Also
--------
:mod:`openfe.protocols`
:class:`openfe.protocols.openmm_septop.SepTopSettings`
:class:`openfe.protocols.openmm_septop.SepTopProtocolResult`
:class:`openfe.protocols.openmm_septop.SepTopComplexSetupUnit`
:class:`openfe.protocols.openmm_septop.SepTopComplexRunUnit`
:class:`openfe.protocols.openmm_septop.SepTopSolventSetupUnit`
:class:`openfe.protocols.openmm_septop.SepTopSolventRunUnit`
"""
result_cls = SepTopProtocolResult
_settings_cls = SepTopSettings
_settings: SepTopSettings
@classmethod
def _default_settings(cls):
"""A dictionary of initial settings for this creating this Protocol
These settings are intended as a suitable starting point for creating
an instance of this protocol. It is recommended, however that care is
taken to inspect and customize these before performing a Protocol.
Returns
-------
Settings
a set of default settings
"""
return SepTopSettings(
protocol_repeats=3,
forcefield_settings=settings.OpenMMSystemGeneratorFFSettings(),
thermo_settings=settings.ThermoSettings(
temperature=298.15 * offunit.kelvin,
pressure=1 * offunit.bar,
),
alchemical_settings=AlchemicalSettings(),
solvent_lambda_settings=LambdaSettings(
lambda_elec_A=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
],
lambda_elec_B=[
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 0.875, 0.75, 0.625, 0.5, 0.375, 0.25, 0.125, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
],
lambda_vdw_A=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.15, 0.23, 0.3, 0.4, 0.52, 0.64, 0.76, 0.88, 1.0,
],
lambda_vdw_B=[
1.0, 0.85, 0.77, 0.7, 0.6, 0.48, 0.36, 0.24, 0.12,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
],
lambda_restraints_A=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
],
lambda_restraints_B=[
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
],
),
complex_lambda_settings=LambdaSettings(),
partial_charge_settings=OpenFFPartialChargeSettings(),
solvent_solvation_settings=OpenMMSolvationSettings(),
complex_solvation_settings=OpenMMSolvationSettings(
solvent_padding=1.0 * offunit.nanometer,
),
engine_settings=OpenMMEngineSettings(),
solvent_integrator_settings=IntegratorSettings(),
complex_integrator_settings=IntegratorSettings(),
solvent_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.1 * offunit.nanosecond,
equilibration_length=0.1 * offunit.nanosecond,
production_length=2.0 * offunit.nanosecond,
),
solvent_equil_output_settings=SepTopEquilOutputSettings(
equil_nvt_structure=None,
equil_npt_structure="equil_npt",
production_trajectory_filename="equil_npt",
log_output="equil_simulation",
),
solvent_simulation_settings=MultiStateSimulationSettings(
n_replicas=27,
minimization_steps=5000,
equilibration_length=1.0 * offunit.nanosecond,
production_length=10.0 * offunit.nanosecond,
),
solvent_output_settings=MultiStateOutputSettings(
output_structure="alchemical_system.pdb",
output_filename="solvent.nc",
checkpoint_storage_filename="solvent_checkpoint.nc",
),
complex_equil_simulation_settings=MDSimulationSettings(
equilibration_length_nvt=0.1 * offunit.nanosecond,
equilibration_length=0.1 * offunit.nanosecond,
production_length=2.0 * offunit.nanosecond,
),
complex_equil_output_settings=SepTopEquilOutputSettings(
equil_nvt_structure=None,
equil_npt_structure="equil_npt",
production_trajectory_filename="equil_production",
log_output="equil_simulation",
),
complex_simulation_settings=MultiStateSimulationSettings(
n_replicas=19,
equilibration_length=1.0 * offunit.nanosecond,
production_length=10.0 * offunit.nanosecond,
),
complex_output_settings=MultiStateOutputSettings(
output_structure="alchemical_system.pdb",
output_filename="complex.nc",
checkpoint_storage_filename="complex_checkpoint.nc",
),
solvent_restraint_settings=DistanceRestraintSettings(
spring_constant=1000.0 * offunit.kilojoule_per_mole / offunit.nanometer**2,
),
complex_restraint_settings=BoreschRestraintSettings(),
) # fmt: skip
@classmethod
def _adaptive_settings(
cls,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
initial_settings: None | SepTopSettings = None,
) -> SepTopSettings:
"""
Get the recommended OpenFE settings for this Protocol based on the input states involved in the
transformation.
These are intended as a suitable starting point, which can be further
customized before creating a Protocol.
Parameters
----------
stateA : ChemicalSystem
The initial state of the transformation.
stateB : ChemicalSystem
The final state of the transformation.
initial_settings : None | SepTopSettings, optional
Initial settings to adapt. If None, default settings are used.
Returns
-------
SepTopSettings
The recommended settings for this protocol based on the input states.
"""
# use initial settings or default settings
if initial_settings is not None:
protocol_settings = initial_settings.model_copy(deep=True)
else:
protocol_settings = cls.default_settings()
# adapt the barostat based on the ProteinComponent
if stateA.contains(ProteinMembraneComponent):
protocol_settings.complex_integrator_settings.barostat = "MonteCarloMembraneBarostat"
return protocol_settings
@staticmethod
def _validate_endstates(
stateA: ChemicalSystem,
stateB: ChemicalSystem,
) -> None:
"""
A complex relative transformation is defined (in terms of gufe components)
as starting from one or more ligands and a protein in solvent and
ending up in a state with one ligand that is different.
Parameters
----------
stateA : ChemicalSystem
The chemical system of end state A
stateB : ChemicalSystem
The chemical system of end state B
Raises
------
ValueError
If there is no SolventComponent and no ProteinComponent
in either stateA or stateB.
If there are no or more than one alchemical components in state A.
If there are no or more than one alchemical components in state B.
If there are any alchemical components that are not SmallMoleculeComponents.
If a change in net charge between the alchemical components is detected.
"""
# check that there is a protein component
if not stateA.contains(ProteinComponent):
errmsg = "No ProteinComponent found in stateA"
raise ValueError(errmsg)
if not stateB.contains(ProteinComponent):
errmsg = "No ProteinComponent found in stateB"
raise ValueError(errmsg)
# check that there is only one protein component
system_validation.validate_protein(stateA)
system_validation.validate_protein(stateB)
# check that there is a SolventComponent
if not stateA.contains(SolventComponent):
errmsg = "No SolventComponent found in stateA"
raise ValueError(errmsg)
if not stateB.contains(SolventComponent):
errmsg = "No SolventComponent found in stateB"
raise ValueError(errmsg)
# Check the difference between the endstates
diff = stateA.component_diff(stateB)
for i, state in enumerate(["stateA", "stateB"]):
# Error if there isn't exactly one alchemical component
if len(diff[i]) != 1:
errmsg = (
"Only one alchemical species is supported. "
f"Number of unique components found in {state}: {len(diff[i])}."
)
raise ValueError(errmsg)
# Error if the component isn't an SMC
if not isinstance(diff[i][0], SmallMoleculeComponent):
errmsg = (
"Only transforming SmallMoleculeComponents are supported "
f"by this Protocol. Found a {type(diff[i][0])}."
)
raise ValueError(errmsg)
# Raise an error if there is a change in net charge
_check_alchemical_charge_difference(diff[0][0], diff[1][0])
@staticmethod
def _validate_lambda_schedule(
lambda_settings: LambdaSettings,
simulation_settings: MultiStateSimulationSettings,
) -> None:
"""
Checks that the lambda schedule is set up correctly.
Parameters
----------
lambda_settings : LambdaSettings
the lambda schedule Settings
simulation_settings : MultiStateSimulationSettings
the settings for either the complex or solvent phase
Raises
------
ValueError
If the number of lambda windows differs for electrostatics and sterics.
If the number of replicas does not match the number of lambda windows.
TODO
----
Add a warning if all the lambda restraints are zero? Issue #1945.
"""
lambda_elec_A = lambda_settings.lambda_elec_A
lambda_elec_B = lambda_settings.lambda_elec_B
lambda_vdw_A = lambda_settings.lambda_vdw_A
lambda_vdw_B = lambda_settings.lambda_vdw_B
lambda_restraints_A = lambda_settings.lambda_restraints_A
lambda_restraints_B = lambda_settings.lambda_restraints_B
n_replicas = simulation_settings.n_replicas
# Ensure that all lambda components have equal amount of windows
lambda_components = [
lambda_vdw_A,
lambda_vdw_B,
lambda_elec_A,
lambda_elec_B,
lambda_restraints_A,
lambda_restraints_B,
]
lengths = {len(lam) for lam in lambda_components}
if len(lengths) != 1:
errmsg = (
"Components elec, vdw, and restraints must have equal amount"
f" of lambda windows. Got {len(lambda_elec_A)} and "
f"{len(lambda_elec_B)} elec lambda windows, "
f"{len(lambda_vdw_A)} and {len(lambda_vdw_B)} vdw "
f"lambda windows, and {len(lambda_restraints_A)} and "
f"{len(lambda_restraints_B)} restraints lambda windows."
)
raise ValueError(errmsg)
# Ensure that number of overall lambda windows matches number of lambda
# windows for individual components
if n_replicas != len(lambda_vdw_B):
errmsg = (
f"Number of replicas {n_replicas} does not equal the"
f" number of lambda windows {len(lambda_vdw_B)}"
)
raise ValueError(errmsg)
# Check if there are lambda windows with naked charges
for state, elec, vdw in (
("A", lambda_elec_A, lambda_vdw_A),
("B", lambda_elec_B, lambda_vdw_B),
):
for idx, (e, v) in enumerate(zip(elec, vdw)):
if e < 1 and v == 1:
raise ValueError(
"There are states along this lambda schedule where "
"there are atoms with charges but no LJ interactions: "
f"State {state}: lambda {idx}: elec {e} vdW {v}"
)
def _validate(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.ComponentMapping | list[gufe.ComponentMapping] | None,
extends: gufe.ProtocolDAGResult | None = None,
) -> None:
# Check we're not trying to extend
if extends:
# This technically should be NotImplementedError
# but gufe.Protocol.validate calls `_validate` wrapped
# around a try/except for that error type
raise ValueError("Can't extend simulations yet")
# Check the mappping
if mapping is not None:
wmsg = "A mapping was passed but is not used by this Protocol"
warnings.warn(wmsg)
# Validate end states
system_validation.validate_chemical_system(stateA)
system_validation.validate_chemical_system(stateB)
self._validate_endstates(stateA, stateB)
# Validate the lambda schedule
self._validate_lambda_schedule(
self.settings.solvent_lambda_settings,
self.settings.solvent_simulation_settings,
)
self._validate_lambda_schedule(
self.settings.complex_lambda_settings,
self.settings.complex_simulation_settings,
)
# Check nonbonded and solvent compatibility
nonbonded_method = self.settings.forcefield_settings.nonbonded_method
# Validate solvent component
system_validation.validate_solvent(stateA, nonbonded_method)
# Validate solvation settings
settings_validation.validate_openmm_solvation_settings(
self.settings.solvent_solvation_settings
)
settings_validation.validate_openmm_solvation_settings(
self.settings.complex_solvation_settings
)
# Validate the barostat used in combination with the protein component
system_validation.validate_barostat(
stateA, self.settings.complex_integrator_settings.barostat
)
def _create(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: gufe.ComponentMapping | list[gufe.ComponentMapping] | None = None,
extends: gufe.ProtocolDAGResult | None = None,
) -> list[gufe.ProtocolUnit]:
self.validate(stateA=stateA, stateB=stateB, mapping=mapping, extends=extends)
# Get the alchemical components
alchem_comps = system_validation.get_alchemical_components(
stateA,
stateB,
)
# Create list units for complex and solvent transforms
alchname_A = alchem_comps["stateA"][0].name
alchname_B = alchem_comps["stateB"][0].name
unit_classes: dict[str, dict[str, type[gufe.ProtocolUnit]]] = {
"solvent": {
"setup": SepTopSolventSetupUnit,
"simulation": SepTopSolventRunUnit,
"analysis": SepTopSolventAnalysisUnit,
},
"complex": {
"setup": SepTopComplexSetupUnit,
"simulation": SepTopComplexRunUnit,
"analysis": SepTopComplexAnalysisUnit,
},
}
protocol_units: dict[str, list[gufe.ProtocolUnit]] = {"solvent": [], "complex": []}
for i in range(self.settings.protocol_repeats):
repeat_id = int(uuid.uuid4())
for phase in ["solvent", "complex"]:
setup = unit_classes[phase]["setup"](
protocol=self,
stateA=stateA,
stateB=stateB,
alchemical_components=alchem_comps,
generation=0,
repeat_id=repeat_id,
name=(
f"SepTop RBFE Setup, transformation {alchname_A} to "
f"{alchname_B}, {phase} leg: repeat {i} generation 0"
),
)
simulation = unit_classes[phase]["simulation"](
protocol=self,
stateA=stateA,
stateB=stateB,
alchemical_components=alchem_comps,
setup=setup,
generation=0,
repeat_id=repeat_id,
name=(
f"SepTop RBFE Run, transformation {alchname_A} to "
f"{alchname_B}, {phase} leg: repeat {i} generation 0"
),
)
analysis = unit_classes[phase]["analysis"](
protocol=self,
setup=setup,
simulation=simulation,
generation=0,
repeat_id=repeat_id,
name=(
f"SepTop RBFE Analysis, transformation {alchname_A} to "
f"{alchname_B}, {phase} leg: repeat {i} generation 0"
),
)
protocol_units[phase] += [setup, simulation, analysis]
return protocol_units["solvent"] + protocol_units["complex"]
def _gather(
self, protocol_dag_results: Iterable[gufe.ProtocolDAGResult]
) -> dict[str, dict[str, Any]]:
# result units will have a repeat_id and generation
# first group according to repeat_id
unsorted_solvent_repeats = defaultdict(list)
unsorted_complex_repeats = defaultdict(list)
for d in protocol_dag_results:
pu: gufe.ProtocolUnitResult
for pu in d.protocol_unit_results:
if ("Analysis" not in pu.name) or (not pu.ok()):
continue
if pu.outputs["simtype"] == "solvent":
unsorted_solvent_repeats[pu.outputs["repeat_id"]].append(pu)
else:
unsorted_complex_repeats[pu.outputs["repeat_id"]].append(pu)
repeats: dict[str, dict[str, list[gufe.ProtocolUnitResult]]] = {
"solvent": {},
"complex": {},
}
for k, v in unsorted_solvent_repeats.items():
repeats["solvent"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
for k, v in unsorted_complex_repeats.items():
repeats["complex"][str(k)] = sorted(v, key=lambda x: x.outputs["generation"])
return repeats
================================================
FILE: src/openfe/protocols/openmm_septop/equil_septop_settings.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Settings class for equilibrium SepTop Protocols using OpenMM + OpenMMTools
This module implements the necessary settings necessary to run SepTop RBFE
calculations using OpenMM.
See Also
--------
openfe.protocols.openmm_septop.SepTopProtocol
"""
from typing import Optional
import numpy as np
from gufe.settings import (
OpenMMSystemGeneratorFFSettings,
SettingsBaseModel,
ThermoSettings,
)
from gufe.settings.typing import PicosecondQuantity
from openff.units import unit as offunit
from pydantic import field_validator
from openfe.protocols.openmm_afe.equil_afe_settings import AlchemicalSettings
from openfe.protocols.openmm_utils.omm_settings import (
IntegratorSettings,
MDOutputSettings,
MDSimulationSettings,
MultiStateOutputSettings,
MultiStateSimulationSettings,
OpenFFPartialChargeSettings,
OpenMMEngineSettings,
OpenMMSolvationSettings,
)
from openfe.protocols.restraint_utils.settings import BaseRestraintSettings
class LambdaSettings(SettingsBaseModel):
"""Lambda schedule settings.
Defines lists of floats to control various aspects of the alchemical
transformation.
Notes
-----
* In all cases a lambda value of 0 defines a fully interacting state A and
a non-interacting state B, whilst a value of 1 defines a fully interacting
state B and a non-interacting state A.
* ``lambda_elec``, `lambda_vdw``, and ``lambda_restraints`` must all be of
the same length, defining all the windows of the transformation.
"""
# fmt: off
lambda_elec_A: list[float] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.25, 0.5, 0.75, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
]
# fmt: on
"""
List of floats of the lambda values for the electrostatics of ligand A.
Zero means fully interacting and 1 means fully decoupled.
Length of this list needs to match length of lambda_vdw and lambda_restraints.
"""
# fmt: off
lambda_elec_B: list[float] = [
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 0.75, 0.5, 0.25, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
]
# fmt: on
"""
List of floats of the lambda values for the electrostatics of ligand B.
Zero means fully interacting and 1 means fully decoupled.
Length of this list needs to match length of lambda_vdw and
lambda_restraints.
"""
# fmt: off
lambda_vdw_A: list[float] = [
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0,
0.14285714285714285, 0.2857142857142857, 0.42857142857142855, 0.5714285714285714, 0.7142857142857142, 0.8571428571428571, 1.0,
]
# fmt: on
"""
List of floats of lambda values for the van der Waals of ligand A.
Zero means fully interacting and 1 means fully decoupled.
Length of this list needs to match length of lambda_elec and
lambda_restraints.
"""
# fmt: off
lambda_vdw_B: list[float] = [
1.0, 0.8571428571428572, 0.7142857142857143, 0.5714285714285714, 0.4285714285714286, 0.2857142857142858, 0.1428571428571429,
0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
]
# fmt: on
"""
List of floats of lambda values for the van der Waals of ligand B.
Zero means fully interacting and 1 means fully decoupled.
Length of this list needs to match length of lambda_elec and lambda_restraints.
"""
# fmt: off
lambda_restraints_A: list[float] = [
0.0, 0.05, 0.1, 0.3, 0.5, 0.75, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
]
# fmt: on
"""
List of floats of lambda values for the restraints of ligand A.
Zero means no restraints are applied and 1 means restraints are fully applied.
Length of this list needs to match length of lambda_vdw and lambda_elec.
"""
# fmt: off
lambda_restraints_B: list[float] = [
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 0.75, 0.5, 0.3, 0.1, 0.05, 0.0,
]
# fmt: on
"""
List of floats of lambda values for the restraints of ligand B.
Zero means no restraints are applied and 1 means restraints are fully applied.
Length of this list needs to match length of lambda_vdw and lambda_elec.
"""
@field_validator(
"lambda_elec_A",
"lambda_elec_B",
"lambda_vdw_A",
"lambda_vdw_B",
"lambda_restraints_A",
"lambda_restraints_B",
)
def must_be_between_0_and_1(cls, v):
for window in v:
if not 0 <= window <= 1:
errmsg = (
f"Lambda windows must be between 0 and 1, got a window with value {window}."
)
raise ValueError(errmsg)
return v
@field_validator(
"lambda_elec_A",
"lambda_vdw_A",
"lambda_restraints_A",
)
def must_be_monotonically_increasing_A(cls, v):
difference = np.diff(v)
monotonic = np.all(difference >= 0)
if not monotonic:
errmsg = (
"The lambda schedule for ligand A is not monotonically"
f" increasing, got schedule {v}."
)
raise ValueError(errmsg)
return v
@field_validator(
"lambda_elec_B",
"lambda_vdw_B",
"lambda_restraints_B",
)
def must_be_monotonically_decreasing_B(cls, v):
difference = np.diff(v)
monotonic = np.all(difference <= 0)
if not monotonic:
errmsg = (
"The lambda schedule for ligand B is not monotonically"
f" decreasing, got schedule {v}."
)
raise ValueError(errmsg)
return v
class SepTopEquilOutputSettings(MDOutputSettings):
# reporter settings
output_indices: str = "all"
"""
Selection string for which part of the system to write coordinates for.
The SepTop protocol enforces "all" since the full system output is
required in the complex leg.
Default "all".
"""
production_trajectory_filename: Optional[str] = "simulation"
"""
Basename for the path to the storage file for analysis. The protocol will
append a '_stateA.xtc' and a '_stateB.xtc' for the output files of the
respective endstates. Default 'simulation'.
"""
trajectory_write_interval: PicosecondQuantity = 20.0 * offunit.picosecond
"""
Frequency to write the xtc file. Default 20 * offunit.picosecond.
"""
preminimized_structure: Optional[str] = "system"
"""
Basename for the path to the pdb file of the full pre-minimized systems.
The protocol will append a '_stateA.pdb' and a '_stateB.pdb' for the output
files of the respective endstates. Default 'system'.
"""
minimized_structure: Optional[str] = "minimized"
"""
Basename for the path to the pdb file of the systems after minimization.
The protocol will append a '_stateA.pdb' and a '_stateB.pdb' for the output
files of the respective endstates. Default 'minimized'.
"""
equil_nvt_structure: Optional[str] = "equil_nvt"
"""
Basename for the path to the pdb file of the systems after NVT equilibration.
The protocol will append a '_stateA' and a '_stateB' for the output files
of the respective endstates. Default 'equil_nvt.pdb'.
"""
equil_npt_structure: Optional[str] = "equil_npt"
"""
Basename for the path to the pdb file of the systems after NPT equilibration.
The protocol will append a '_stateA.pdb' and a '_stateB.pdb' for the output
files of the respective endstates. Default 'equil_npt'.
"""
log_output: Optional[str] = "simulation"
"""
Basename for the filename for writing the log of the MD simulation,
including timesteps, energies, density, etc.
The protocol will append a '_stateA.pdb' and a '_stateB.pdb' for the output
files of the respective endstates. Default 'simulation'.
"""
@field_validator("output_indices")
def must_be_all(cls, v):
if v != "all":
errmsg = f"Equilibration simulations need to output the full system, got {v}."
raise ValueError(errmsg)
return v
class SepTopSettings(SettingsBaseModel):
"""
Configuration object for ``SepTopProtocol``.
See Also
--------
openfe.protocols.openmm_septop.SepTopProtocol
"""
protocol_repeats: int
"""
The number of completely independent repeats of the entire sampling
process. The mean of the repeats defines the final estimate of FE
difference, while the variance between repeats is used as the uncertainty.
"""
@field_validator("protocol_repeats")
def must_be_positive(cls, v):
if v <= 0:
errmsg = f"protocol_repeats must be a positive value, got {v}."
raise ValueError(errmsg)
return v
# Inherited things
forcefield_settings: OpenMMSystemGeneratorFFSettings
"""Parameters to set up the force field with OpenMM Force Fields"""
thermo_settings: ThermoSettings
"""Settings for thermodynamic parameters"""
solvent_solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the solvent system."""
complex_solvation_settings: OpenMMSolvationSettings
"""Settings for solvating the complex system."""
# Alchemical settings
alchemical_settings: AlchemicalSettings
"""
Alchemical protocol settings.
"""
solvent_lambda_settings: LambdaSettings
"""
Settings for controlling the lambda schedule for the different components
(vdw, elec, restraints) in the solvent.
"""
complex_lambda_settings: LambdaSettings
"""
Settings for controlling the lambda schedule for the different components
(vdw, elec, restraints) in the complex.
"""
# MD Engine things
engine_settings: OpenMMEngineSettings
"""
Settings specific to the OpenMM engine, such as the compute platform.
"""
# Sampling State defining things
solvent_integrator_settings: IntegratorSettings
"""
Settings for controlling the integrator, such as the timestep and
barostat settings in the solvent.
"""
complex_integrator_settings: IntegratorSettings
"""
Settings for controlling the integrator, such as the timestep and
barostat settings in the complex.
"""
# Simulation run settings
complex_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical complex simulation control settings.
"""
complex_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the complex transformation.
"""
solvent_equil_simulation_settings: MDSimulationSettings
"""
Pre-alchemical solvent simulation control settings.
"""
solvent_simulation_settings: MultiStateSimulationSettings
"""
Simulation control settings, including simulation lengths
for the solvent transformation.
"""
complex_equil_output_settings: SepTopEquilOutputSettings
"""
Simulation output settings for the complex non-alchemical equilibration.
"""
complex_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the complex transformation.
"""
solvent_equil_output_settings: SepTopEquilOutputSettings
"""
Simulation output settings for the solvent non-alchemical equilibration.
"""
solvent_output_settings: MultiStateOutputSettings
"""
Simulation output settings for the solvent transformation.
"""
partial_charge_settings: OpenFFPartialChargeSettings
"""
Settings for controlling how to assign partial charges,
including the partial charge assignment method, and the
number of conformers used to generate the partial charges.
"""
solvent_restraint_settings: BaseRestraintSettings
"""
Settings for the harmonic restraint in the solvent
"""
complex_restraint_settings: BaseRestraintSettings
"""
Settings for the Boresch restraints in the complex
"""
================================================
FILE: src/openfe/protocols/openmm_septop/septop_protocol_results.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Result class for the SepTop Protocol :class:`openfe.protocols.openmm_septop.SepTopProtocolResult`
====================================================================================================
This module implement a :class:`gufe.ProtocolResult` class to contain the results of
a :class:`openfe.protocols.openmm_septop.SepTopProtocol` free energy simulation.
"""
from __future__ import annotations
import itertools
import logging
import pathlib
import warnings
from typing import Any, Optional, Union
import gufe
import numpy as np
import numpy.typing as npt
from openff.units import Quantity
from openff.units import unit as offunit
from openmmtools import multistate
from openfe.protocols.restraint_utils.geometry.boresch import BoreschRestraintGeometry
logger = logging.getLogger(__name__)
class SepTopProtocolResult(gufe.ProtocolResult):
"""Dict-like container for the output of a SepTopProtocol"""
def __init__(self, **data):
super().__init__(**data)
# TODO: Detect when we have extensions and stitch these together?
if any(
len(pur_list) > 2
for pur_list in itertools.chain(
self.data["solvent"].values(), self.data["complex"].values()
)
):
raise NotImplementedError("Can't stitch together results yet")
def get_individual_estimates(
self,
) -> dict[str, list[tuple[Quantity, Quantity]]]:
"""
Get the individual estimate of the free energies.
Returns
-------
dGs : dict[str, list[tuple[unit.Quantity, unit.Quantity]]]
A dictionary, keyed ``solvent`` and ``complex`` for each leg
of the thermodynamic cycle, with lists of tuples containing
the individual free energy estimates and associated MBAR
uncertainties for each repeat of that simulation type.
"""
complex_dGs = []
complex_correction_dGs_A = []
complex_correction_dGs_B = []
solv_dGs = []
solv_correction_dGs: list[tuple[Any, Any]] = []
for pus in self.data["complex"].values():
complex_dGs.append(
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
)
complex_correction_dGs_A.append(
(
pus[0].outputs["standard_state_correction_A"],
0 * offunit.kilocalorie_per_mole, # correction has no error
)
)
complex_correction_dGs_B.append(
(
pus[0].outputs["standard_state_correction_B"],
0 * offunit.kilocalorie_per_mole, # correction has no error
)
)
for pus in self.data["solvent"].values():
solv_dGs.append(
(pus[0].outputs["unit_estimate"], pus[0].outputs["unit_estimate_error"])
)
solv_correction_dGs.append(
(
pus[0].outputs["standard_state_correction"],
0 * offunit.kilocalorie_per_mole, # correction has no error
)
)
return {
"solvent": solv_dGs,
"complex": complex_dGs,
"standard_state_correction_complex_A": complex_correction_dGs_A,
"standard_state_correction_complex_B": complex_correction_dGs_B,
"standard_state_correction_solvent": solv_correction_dGs,
}
@staticmethod
def _add_complex_standard_state_corr(
complex_dG: list[tuple[Quantity, Quantity]],
standard_state_corrA_dG: list[tuple[Quantity, Quantity]],
standard_state_corrB_dG: list[tuple[Quantity, Quantity]],
) -> list[tuple[Quantity, Quantity]]:
"""
Helper method to combine the
complex & standard state corrections legs.
Parameters
----------
complex_dG : list[tuple[openff.units.Quantity, openff.units.Quantity]]
The individual estimates of the complex leg,
where the first entry of each tuple is the dG estimate
and the second entry is the MBAR error.
standard_state_corrA_dG : list[tuple[Quantity, Quantity]]
The individual standard state corrections of state A
for each corresponding complex leg. The first entry is the
correction, the second is an empty error value of 0.
standard_state_corrB_dG : list[tuple[Quantity, Quantity]]
The individual standard state corrections of state B
for each corresponding complex leg. The first entry is the
correction, the second is an empty error value of 0.
Returns
-------
combined_dG : list[tuple[openff.units.Quantity,openff.units. Quantity]]
A list of dG estimates & MBAR errors for the combined
complex & standard state correction of each repeat.
Notes
-----
We assume that both list of items are in the right order.
"""
combined_dG: list[tuple[Quantity, Quantity]] = []
for comp, corrA, corrB in zip(complex_dG, standard_state_corrA_dG, standard_state_corrB_dG):
# No need to convert unit types, since pint takes care of that
# except that mypy hates it because pint isn't typed properly...
# No need to add errors since there's just the one
combined_dG.append((comp[0] + corrA[0] + corrB[0], comp[1])) # type: ignore[operator]
return combined_dG
@staticmethod
def _add_solvent_standard_state_corr(
solvent_dG: list[tuple[Quantity, Quantity]],
standard_state_corr_dG: list[tuple[Quantity, Quantity]],
) -> list[tuple[Quantity, Quantity]]:
"""
Helper method to combine the
solvent & standard state corrections legs.
Parameters
----------
solvent_dG : list[tuple[openff.units.Quantity, openff.units.Quantity]]
The individual estimates of the solvent leg,
where the first entry of each tuple is the dG estimate
and the second entry is the MBAR error.
standard_state_corrA_dG : list[tuple[Quantity, Quantity]]
The individual solvent standard state corrections.
The first entry is the correction, the second is an empty error
value of 0.
Returns
-------
combined_dG : list[tuple[openff.units.Quantity,openff.units. Quantity]]
A list of dG estimates & MBAR errors for the combined
solvent & standard state correction of each repeat.
Notes
-----
We assume that both list of items are in the right order.
"""
combined_dG: list[tuple[Quantity, Quantity]] = []
for comp, corr in zip(solvent_dG, standard_state_corr_dG):
# No need to convert unit types, since pint takes care of that
# except that mypy hates it because pint isn't typed properly...
# No need to add errors since there's just the one
combined_dG.append((comp[0] + corr[0], comp[1])) # type: ignore[operator]
return combined_dG
def get_estimate(self) -> Quantity:
"""Get the difference in binding free energy estimate for this calculation.
Returns
-------
ddG : openff.units.Quantity
The difference in binding free energy.
This is a Quantity defined with units.
"""
def _get_average(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
ddGs = [i[0].to(u).m for i in estimates]
return np.average(ddGs) * u
individual_estimates = self.get_individual_estimates()
solv_ddG = _get_average(
self._add_solvent_standard_state_corr(
individual_estimates["solvent"],
individual_estimates["standard_state_correction_solvent"],
)
)
complex_ddG = _get_average(
self._add_complex_standard_state_corr(
individual_estimates["complex"],
individual_estimates["standard_state_correction_complex_A"],
individual_estimates["standard_state_correction_complex_B"],
)
)
return complex_ddG - solv_ddG
def get_uncertainty(self) -> Quantity:
"""Get the relative free energy error for this calculation.
Returns
-------
err : unit.Quantity
The standard deviation between estimates of the relative binding free
energy. This is a Quantity defined with units.
"""
def _get_stdev(estimates):
# Get the unit value of the first value in the estimates
u = estimates[0][0].u
# Loop through estimates and get the free energy values
# in the unit of the first estimate
ddGs = [i[0].to(u).m for i in estimates]
return np.std(ddGs) * u
individual_estimates = self.get_individual_estimates()
solv_err = _get_stdev(
self._add_solvent_standard_state_corr(
individual_estimates["solvent"],
individual_estimates["standard_state_correction_solvent"],
)
)
complex_err = _get_stdev(
self._add_complex_standard_state_corr(
individual_estimates["complex"],
individual_estimates["standard_state_correction_complex_A"],
individual_estimates["standard_state_correction_complex_B"],
)
)
# return the combined error
return np.sqrt(solv_err**2 + complex_err**2)
def get_forward_and_reverse_energy_analysis(
self,
) -> dict[str, list[Optional[dict[str, Union[npt.NDArray, Quantity]]]]]:
"""
Get the reverse and forward analysis of the free energies.
Returns
-------
forward_reverse : dict[str, list[Optional[dict[str, Union[npt.NDArray, unit.Quantity]]]]]
A dictionary, keyed `complex` and `solvent` for each leg of the
thermodynamic cycle which each contain a list of dictionaries
containing the forward and reverse analysis of each repeat
of that simulation type.
The forward and reverse analysis dictionaries contain:
- `fractions`: npt.NDArray
The fractions of data used for the estimates
- `forward_DDGs`, `reverse_DDGs`: unit.Quantity
The forward and reverse estimates for each fraction of data
- `forward_dDDGs`, `reverse_dDDGs`: unit.Quantity
The forward and reverse estimate uncertainty for each
fraction of data.
If one of the cycle leg list entries is ``None``, this indicates
that the analysis could not be carried out for that repeat. This
is most likely caused by MBAR convergence issues when attempting to
calculate free energies from too few samples.
Raises
------
UserWarning
* If any of the forward and reverse dictionaries are ``None`` in a
given thermodynamic cycle leg.
"""
forward_reverse: dict[str, list[Optional[dict[str, Union[npt.NDArray, Quantity]]]]] = {}
for key in ["complex", "solvent"]:
forward_reverse[key] = [
pus[0].outputs["forward_and_reverse_energies"] for pus in self.data[key].values()
]
if None in forward_reverse[key]:
wmsg = (
"One or more ``None`` entries were found in the forward "
f"and reverse dictionaries of the repeats of the {key} "
"calculations. This is likely caused by an MBAR convergence "
"failure caused by too few independent samples when "
"calculating the free energies of the 10% timeseries slice."
)
warnings.warn(wmsg)
return forward_reverse
def get_overlap_matrices(self) -> dict[str, list[dict[str, npt.NDArray]]]:
"""
Get a the MBAR overlap estimates for all legs of the simulation.
Returns
-------
overlap_stats : dict[str, list[dict[str, npt.NDArray]]]
A dictionary with keys `complex` and `solvent` for each
leg of the thermodynamic cycle, which each containing a
list of dictionaries with the MBAR overlap estimates of
each repeat of that simulation type.
The underlying MBAR dictionaries contain the following keys:
* ``scalar``: One minus the largest nontrivial eigenvalue
* ``eigenvalues``: The sorted (descending) eigenvalues of the
overlap matrix
* ``matrix``: Estimated overlap matrix of observing a sample from
state i in state j
"""
# Loop through and get the repeats and get the matrices
overlap_stats: dict[str, list[dict[str, npt.NDArray]]] = {}
for key in ["complex", "solvent"]:
overlap_stats[key] = [
pus[0].outputs["unit_mbar_overlap"] for pus in self.data[key].values()
]
return overlap_stats
def get_replica_transition_statistics(
self,
) -> dict[str, list[dict[str, npt.NDArray]]]:
"""
Get the replica exchange transition statistics for all
legs of the simulation.
Note
----
This is currently only available in cases where a replica exchange
simulation was run.
Returns
-------
repex_stats : dict[str, list[dict[str, npt.NDArray]]]
A dictionary with keys `complex` and `solvent` for each
leg of the thermodynamic cycle, which each containing
a list of dictionaries containing the replica transition
statistics for each repeat of that simulation type.
The replica transition statistics dictionaries contain the following:
* ``eigenvalues``: The sorted (descending) eigenvalues of the
lambda state transition matrix
* ``matrix``: The transition matrix estimate of a replica switching
from state i to state j.
"""
repex_stats: dict[str, list[dict[str, npt.NDArray]]] = {}
try:
for key in ["complex", "solvent"]:
repex_stats[key] = [
pus[0].outputs["replica_exchange_statistics"] for pus in self.data[key].values()
]
except KeyError:
errmsg = "Replica exchange statistics were not found, did you run a repex calculation?"
raise ValueError(errmsg)
return repex_stats
def get_replica_states(self) -> dict[str, list[npt.NDArray]]:
"""
Get the timeseries of replica states for all simulation legs.
Returns
-------
replica_states : dict[str, list[npt.NDArray]]
Dictionary keyed `complex` and `solvent` for each leg of
the thermodynamic cycle, with lists of replica states
timeseries for each repeat of that simulation type.
"""
replica_states: dict[str, list[npt.NDArray]] = {"complex": [], "solvent": []}
def is_file(filename: str):
p = pathlib.Path(filename)
if not p.exists():
errmsg = f"File could not be found {p}"
raise ValueError(errmsg)
return p
def get_replica_state(nc, chk):
nc = is_file(nc)
dir_path = nc.parents[0]
chk = is_file(dir_path / chk).name
reporter = multistate.MultiStateReporter(
storage=nc, checkpoint_storage=chk, open_mode="r"
)
retval = np.asarray(reporter.read_replica_thermodynamic_states())
reporter.close()
return retval
for key in ["complex", "solvent"]:
for pus in self.data[key].values():
states = get_replica_state(
pus[0].outputs["trajectory"],
pus[0].outputs["checkpoint"],
)
replica_states[key].append(states)
return replica_states
def equilibration_iterations(self) -> dict[str, list[float]]:
"""
Get the number of equilibration iterations for each simulation.
Returns
-------
equilibration_lengths : dict[str, list[float]]
Dictionary keyed `complex` and `solvent` for each leg
of the thermodynamic cycle, with lists containing the
number of equilibration iterations for each repeat
of that simulation type.
"""
equilibration_lengths: dict[str, list[float]] = {}
for key in ["complex", "solvent"]:
equilibration_lengths[key] = [
pus[0].outputs["equilibration_iterations"] for pus in self.data[key].values()
]
return equilibration_lengths
def production_iterations(self) -> dict[str, list[float]]:
"""
Get the number of production iterations for each simulation.
Returns the number of uncorrelated production samples for each
repeat of the calculation.
Returns
-------
production_lengths : dict[str, list[float]]
Dictionary keyed `complex` and `solvent` for each leg of the
thermodynamic cycle, with lists with the number
of production iterations for each repeat of that simulation
type.
"""
production_lengths: dict[str, list[float]] = {}
for key in ["complex", "solvent"]:
production_lengths[key] = [
pus[0].outputs["production_iterations"] for pus in self.data[key].values()
]
return production_lengths
def restraint_geometries(
self,
) -> tuple[list[BoreschRestraintGeometry], list[BoreschRestraintGeometry]]:
"""
Get a list of the restraint geometries for the
complex simulations. These define the atoms that have
been restrained in the system.
Returns
-------
geometry_A : list[dict[str, Any]]
A list of dictionaries containing the details of the atoms
in the system that are involved in the restraint of ligand A.
geometry_B : list[dict[str, Any]]
A list of dictionaries containing the details of the atoms
in the system that are involved in the restraint of ligand B.
"""
geometry_A = [
BoreschRestraintGeometry.model_validate(pus[0].outputs["restraint_geometry_A"])
for pus in self.data["complex"].values()
]
geometry_B = [
BoreschRestraintGeometry.model_validate(pus[0].outputs["restraint_geometry_B"])
for pus in self.data["complex"].values()
]
return geometry_A, geometry_B
def selection_indices(self) -> dict[str, list[Optional[npt.NDArray]]]:
"""
Get the system selection indices used to write PDB and
trajectory files.
Returns
-------
indices : dict[str, list[npt.NDArray]]
A dictionary keyed as `complex` and `solvent` for each
state, each containing a list of NDArrays containing the corresponding
full system atom indices for each atom written in the production
trajectory files for each replica.
"""
indices: dict[str, list[Optional[npt.NDArray]]] = {}
for key in ["complex", "solvent"]:
indices[key] = []
for pus in self.data[key].values():
indices[key].append(pus[0].outputs["selection_indices"])
return indices
================================================
FILE: src/openfe/protocols/openmm_septop/septop_units.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
r"""OpenMM Equilibrium SepTop RBFE Protocol Units
================================================
This module implements the :class:`gufe.ProtocolUnit`\s for the
Separated Topologies RBFE protocol.
"""
from __future__ import annotations
import copy
import itertools
import logging
import pathlib
from typing import Any
import MDAnalysis as mda
import mdtraj as md
import numpy as np
import openmm
import openmm.unit
import openmm.unit as omm_units
from gufe import (
SmallMoleculeComponent,
SolvatedPDBComponent,
SolventComponent,
)
from gufe.settings import SettingsBaseModel
from MDAnalysis.coordinates.memory import MemoryReader
from openff.toolkit.topology import Molecule as OFFMolecule
from openff.units import Quantity
from openff.units.openmm import from_openmm, to_openmm
from openmmtools.states import ThermodynamicState
from rdkit import Chem
from openfe.protocols.openmm_utils import omm_compute
from openfe.protocols.openmm_utils.serialization import serialize
from openfe.protocols.restraint_utils import geometry
from openfe.protocols.restraint_utils.geometry.boresch import BoreschRestraintGeometry
from openfe.protocols.restraint_utils.openmm import omm_restraints
from openfe.protocols.restraint_utils.openmm.omm_restraints import (
BoreschRestraint,
add_force_in_separate_group,
)
from ..openmm_utils import (
settings_validation,
system_validation,
)
from ..openmm_utils.mdtraj_utils import mdtraj_from_openmm
from ..restraint_utils.settings import (
BoreschRestraintSettings,
DistanceRestraintSettings,
)
from .base_units import (
BaseSepTopAnalysisUnit,
BaseSepTopRunUnit,
BaseSepTopSetupUnit,
_pre_equilibrate,
)
logger = logging.getLogger(__name__)
class SepTopComplexMixin:
"""
A mixin to get the components and the settings for the Complex Units.
"""
def _get_components(self):
"""
Get the relevant components for a complex transformation.
Returns
-------
alchem_comps : dict[str, Component]
A list of alchemical components
solv_comp : SolventComponent
The SolventComponent of the system
prot_comp : ProteinComponent | None
The protein component of the system, if it exists.
small_mols : dict[SmallMoleculeComponent: OFFMolecule]
SmallMoleculeComponents to add to the system.
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
solv_comp, prot_comp, small_mols = system_validation.get_components(stateA)
small_mols = {m: m.to_openff() for m in small_mols}
# Also get alchemical smc from state B
small_mols_B = {m: m.to_openff() for m in alchem_comps["stateB"]}
small_mols = small_mols | small_mols_B
# If there is a SolvatedPDBComponent, we set the solv_comp in the
# complex to that, as the SolventComponent is only used in the solvent leg
if isinstance(prot_comp, SolvatedPDBComponent):
solv_comp = prot_comp
return alchem_comps, solv_comp, prot_comp, small_mols
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a complex transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : SepTopEquilOutputSettings
* simulation_settings : SimulationSettings
* output_settings: MultiStateOutputSettings
* restraint_settings: BoreschRestraintSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore
settings = {
"forcefield_settings": prot_settings.forcefield_settings,
"thermo_settings": prot_settings.thermo_settings,
"charge_settings": prot_settings.partial_charge_settings,
"solvation_settings": prot_settings.complex_solvation_settings,
"alchemical_settings": prot_settings.alchemical_settings,
"lambda_settings": prot_settings.complex_lambda_settings,
"engine_settings": prot_settings.engine_settings,
"integrator_settings": prot_settings.complex_integrator_settings,
"equil_simulation_settings": prot_settings.complex_equil_simulation_settings,
"equil_output_settings": prot_settings.complex_equil_output_settings,
"simulation_settings": prot_settings.complex_simulation_settings,
"output_settings": prot_settings.complex_output_settings,
"restraint_settings": prot_settings.complex_restraint_settings,
}
settings_validation.validate_timestep(
settings["forcefield_settings"].hydrogen_mass,
settings["integrator_settings"].timestep,
)
return settings
class SepTopSolventMixin:
"""
A mixin to get the components and the settings for the Solvent Units.
"""
def _get_components(self):
"""
Get the relevant components for a solvent transformation.
Note
-----
The solvent portion of the transformation is the transformation of one
ligand into the other in the solvent. The only thing that
should be present is the alchemical species in state A and state B
and the SolventComponent.
Returns
-------
alchem_comps : dict[str, Component]
A list of alchemical components
solv_comp : SolventComponent
The SolventComponent of the system
prot_comp : ProteinComponent | None
The protein component of the system, if it exists.
small_mols : dict[SmallMoleculeComponent: OFFMolecule]
SmallMoleculeComponents to add to the system.
"""
stateA = self._inputs["stateA"]
alchem_comps = self._inputs["alchemical_components"]
small_mols_A = {m: m.to_openff() for m in alchem_comps["stateA"]}
small_mols_B = {m: m.to_openff() for m in alchem_comps["stateB"]}
small_mols = small_mols_A | small_mols_B
solv_comp, _, _ = system_validation.get_components(stateA)
return alchem_comps, solv_comp, None, small_mols
def _get_settings(self) -> dict[str, SettingsBaseModel]:
"""
Extract the relevant settings for a solvent transformation.
Returns
-------
settings : dict[str, SettingsBaseModel]
A dictionary with the following entries:
* forcefield_settings : OpenMMSystemGeneratorFFSettings
* thermo_settings : ThermoSettings
* charge_settings : OpenFFPartialChargeSettings
* solvation_settings : OpenMMSolvationSettings
* alchemical_settings : AlchemicalSettings
* lambda_settings : LambdaSettings
* engine_settings : OpenMMEngineSettings
* integrator_settings : IntegratorSettings
* equil_simulation_settings : MDSimulationSettings
* equil_output_settings : SepTopEquilOutputSettings
* simulation_settings : MultiStateSimulationSettings
* output_settings: MultiStateOutputSettings
* restraint_settings: BaseRestraintsSettings
"""
prot_settings = self._inputs["protocol"].settings # type: ignore
settings = {
"forcefield_settings": prot_settings.forcefield_settings,
"thermo_settings": prot_settings.thermo_settings,
"charge_settings": prot_settings.partial_charge_settings,
"solvation_settings": prot_settings.solvent_solvation_settings,
"alchemical_settings": prot_settings.alchemical_settings,
"lambda_settings": prot_settings.solvent_lambda_settings,
"engine_settings": prot_settings.engine_settings,
"integrator_settings": prot_settings.solvent_integrator_settings,
"equil_simulation_settings": prot_settings.solvent_equil_simulation_settings,
"equil_output_settings": prot_settings.solvent_equil_output_settings,
"simulation_settings": prot_settings.solvent_simulation_settings,
"output_settings": prot_settings.solvent_output_settings,
"restraint_settings": prot_settings.solvent_restraint_settings,
}
settings_validation.validate_timestep(
settings["forcefield_settings"].hydrogen_mass,
settings["integrator_settings"].timestep,
)
return settings
class SepTopComplexSetupUnit(SepTopComplexMixin, BaseSepTopSetupUnit):
"""
Protocol Unit for the complex phase of a SepTop free energy calculation
"""
simtype = "complex"
def get_system_AB(
self,
solv_comp: SolventComponent,
system_modeller_A: openmm.app.Modeller,
smc_comps_AB: dict[SmallMoleculeComponent, OFFMolecule],
smc_off_B: dict[SmallMoleculeComponent, OFFMolecule],
settings: dict[str, SettingsBaseModel],
):
"""
Creates an OpenMM system, topology, positions, and modeller for a
complex system that contains a protein and two ligands. This takes
the modeller of complex A (solvated protein-ligand A complex) and
inserts ligand B into that complex.
Parameters
----------
solv_comp: SolventComponent
The SolventComponent
system_modeller_A: openmm.app.Modeller
smc_comps_AB: dict[SmallMoleculeComponent,OFFMolecule]
The dictionary of all SmallMoleculeComponents in the system.
smc_off_B: dict[SmallMoleculeComponent,OFFMolecule]
The dictionary of the SmallMoleculeComponent and OFF Molecule of
ligand B
settings: dict[str, SettingsBaseModel]
A dictionary of settings objects for the unit.
Returns
-------
omm_system_AB: openmm.System
omm_topology_AB: openmm.app.Topology
positions_AB: openmm.unit.Quantity
system_modeller_AB: openmm.app.Modeller
"""
# Get system generator
system_generator = self._get_system_generator(settings, solv_comp)
# Get modeller B only ligand B
modeller_ligandB, comp_resids_ligB = self._get_modeller(
None,
None,
smc_off_B,
system_generator,
settings["solvation_settings"],
)
# Take the modeller from system A --> every water/ion should be in
# the same location
system_modeller_AB = copy.copy(system_modeller_A)
system_modeller_AB.add(modeller_ligandB.topology, modeller_ligandB.positions)
omm_topology_AB, omm_system_AB, positions_AB = self._get_omm_objects(
system_modeller_AB, system_generator, list(smc_comps_AB.values())
)
return omm_system_AB, omm_topology_AB, positions_AB, system_modeller_AB
@staticmethod
def _get_selection_atom_indices(
traj: md.Trajectory,
selection: str = "backbone",
):
"""
Get the atom indices of a MDTraj object, given a selection string.
Parameters
----------
traj: md.Trajectory
The Mdtraj trajectory for which to get the atom indices.
selection: str
The selection string. Default: 'backbone'
Returns
-------
indices: list
The list of atom indices that satisfy the selection string.
Raises
------
ValueError
If less than three atom indices are found for the selection string.
"""
indices = traj.topology.select(selection)
if len(indices) < 3:
errmsg = (
f"Less than 3 ({len(indices)} backbone atoms were found For "
"complex A. No alignment of structures is possible."
"Currently only proteins are supported as hosts."
)
raise ValueError(errmsg)
return indices
@staticmethod
def _update_positions(
omm_topology_A: openmm.app.Topology,
omm_topology_B: openmm.app.Topology,
positions_A: openmm.unit.Quantity,
positions_B: openmm.unit.Quantity,
) -> openmm.unit.Quantity:
"""
Aligns the protein from complex B onto the protein from complex A and
updates the positions of complex B.
Parameters
----------
omm_topology_A: openmm.app.Topology
OpenMM topology from complex A
omm_topology_B: openmm.app.Topology
OpenMM topology from complex B
positions_A: openmm.unit.Quantity
Positions of the system in state A
positions_B: openmm.unit.Quantity
Positions of the system in state B
Returns
-------
updated_positions_B: openmm.unit.Quantity
Updated positions of the complex B
"""
mdtraj_complex_A = mdtraj_from_openmm(omm_topology_A, positions_A)
mdtraj_complex_B = mdtraj_from_openmm(omm_topology_B, positions_B)
alignment_indices = SepTopComplexSetupUnit._get_selection_atom_indices(mdtraj_complex_A)
imaged_complex_B = mdtraj_complex_B.image_molecules()
imaged_complex_B.superpose(
mdtraj_complex_A,
atom_indices=alignment_indices,
)
# Extract updated system positions.
updated_positions_B = imaged_complex_B.openmm_positions(-1)
return updated_positions_B
@staticmethod
def _get_mda_universe(
topology: openmm.app.Topology,
positions: openmm.unit.Quantity,
trajectory: pathlib.Path | None,
settings: dict[str, SettingsBaseModel],
) -> mda.Universe:
"""
Helper method to get a Universe from an openmm Topology,
and either an input trajectory or a set of positions.
Parameters
----------
topology : openmm.app.Topology
An OpenMM Topology that defines the System.
positions: openmm.unit.Quantity
The System's current positions.
Used if a trajectory file is None or is not a file.
trajectory: pathlib.Path
A Path to a trajectory file to read positions from.
settings: dict
The settings dictionary
Returns
-------
mda.Universe
An MDAnalysis Universe of the System.
"""
# If the trajectory file doesn't exist, then we use positions
write_int = settings["equil_output_settings"].trajectory_write_interval
prod_length = settings["equil_simulation_settings"].production_length
if trajectory is not None and trajectory.is_file() and write_int <= prod_length:
return mda.Universe(
topology,
trajectory,
topology_format="OPENMMTOPOLOGY",
)
else:
# Positions is an openmm Quantity in nm we need
# to convert to angstroms
return mda.Universe(
topology,
np.array(positions._value) * 10,
topology_format="OPENMMTOPOLOGY",
trajectory_format=MemoryReader,
)
@staticmethod
def _get_boresch_restraint(
universe: mda.Universe,
guest_rdmol: Chem.Mol,
guest_atom_ids: list[int],
host_atom_ids: list[int],
temperature: Quantity,
settings: BoreschRestraintSettings,
) -> tuple[BoreschRestraintGeometry, BoreschRestraint]:
"""
Get a Boresch-like restraint Geometry and OpenMM restraint force
supplier.
Parameters
----------
universe : mda.Universe
An MDAnalysis Universe defining the system to get the restraint for.
guest_rdmol : Chem.Mol
An RDKit Molecule defining the guest molecule in the system.
guest_atom_ids: list[int]
A list of atom indices defining the guest molecule in the universe.
host_atom_ids : list[int]
A list of atom indices defining the host molecules in the universe.
temperature : unit.Quantity
The temperature of the simulation where the restraint will be added.
settings : BoreschRestraintSettings
Settings on how the Boresch-like restraint should be defined.
Returns
-------
geom : BoreschRestraintGeometry
A class defining the Boresch-like restraint.
restraint : BoreschRestraint
A factory class for generating Boresch restraints in OpenMM.
"""
frc_const = min(settings.K_thetaA, settings.K_thetaB)
geom = geometry.boresch.find_boresch_restraint(
universe=universe,
guest_rdmol=guest_rdmol,
guest_idxs=guest_atom_ids,
host_idxs=host_atom_ids,
host_selection=settings.host_selection,
anchor_finding_strategy=settings.anchor_finding_strategy,
dssp_filter=settings.dssp_filter,
rmsf_cutoff=settings.rmsf_cutoff,
host_min_distance=settings.host_min_distance,
host_max_distance=settings.host_max_distance,
angle_force_constant=frc_const,
temperature=temperature,
)
restraint = omm_restraints.BoreschRestraint(settings)
return geom, restraint
def _add_restraints(
self,
system: openmm.System,
topology_A: openmm.app.Topology,
topology_B: openmm.app.Topology,
positions_A: openmm.unit.Quantity,
positions_B: openmm.unit.Quantity,
mol_A: SmallMoleculeComponent,
mol_B: SmallMoleculeComponent,
ligand_A_inxs: list[int],
ligand_B_inxs: list[int],
ligand_B_inxs_B: list[int],
protein_inxs: list[int],
settings: dict[str, SettingsBaseModel],
) -> tuple[
Quantity,
Quantity,
openmm.System,
geometry.HostGuestRestraintGeometry,
geometry.HostGuestRestraintGeometry,
]:
"""
Adds Boresch restraints to the system.
Parameters
----------
system: openmm.System
The OpenMM system where the restraints will be applied to.
topology_A: openmm.app.Topology
The OpenMM topology that defines the system A
topology_B: openmm.app.Topology
The OpenMM topology that defines the system B
positions_A: openmm.unit.Quantity
Positions of the system A. This could be a single set of positions,
or a full trajectory.
positions_B: openmm.unit.Quantity
Positions of the system B. This could be a single set of positions,
or a full trajectory.
mol_A: SmallMoleculeComponent
The SmallMoleculeComponent of ligand A
mol_B: SmallMoleculeComponent
The SmallMoleculeComponent of ligand B
ligand_A_inxs: list[int]
Atom indices of ligand A in the complex A
ligand_B_inxs: list[int]
Atom indices of ligand B in the complex B
ligand_B_inxs_B: list[int]
Atom indices of ligand B in the full system (AB)
protein_inxs: list[int]
Atom indices from the protein atoms
settings: dict[str, SettingsBaseModel]
The settings dict
Returns
-------
correction_A: unit.Quantity
The standard state correction for the restraint for ligand A.
correction_B: unit.Quantity
The standard state correction for the restraint for ligand B.
restrained_system: openmm.System
The OpenMM system with the added restraints forces
rest_geom_A: geometry.HostGuestRestraintGeometry
The restraint Geometry object for ligand A.
rest_geom_B: geometry.HostGuestRestraintGeometry
The restraint Geometry object for ligand B.
"""
# Get the MDA Universe for the restraints selection
# We try to pass the equilibration production file path through
# In some cases (debugging / dry runs) this won't be available
# so we'll default to using input positions.
out_traj = (
self.shared_basepath / settings["equil_output_settings"].production_trajectory_filename
)
u_A = self._get_mda_universe(
topology_A,
positions_A,
pathlib.Path(f"{out_traj}_stateA.xtc"),
settings,
)
u_B = self._get_mda_universe(
topology_B,
positions_B,
pathlib.Path(f"{out_traj}_stateB.xtc"),
settings,
)
rdmol_A = mol_A.to_rdkit()
rdmol_B = mol_B.to_rdkit()
Chem.SanitizeMol(rdmol_A)
Chem.SanitizeMol(rdmol_B)
rest_geom_A, restraint_A = self._get_boresch_restraint(
u_A,
rdmol_A,
ligand_A_inxs,
protein_inxs,
settings["thermo_settings"].temperature,
settings["restraint_settings"],
)
rest_geom_B, restraint_B = self._get_boresch_restraint(
u_B,
rdmol_B,
ligand_B_inxs_B,
protein_inxs,
settings["thermo_settings"].temperature,
settings["restraint_settings"],
)
# We have to update the indices for ligand B to match the AB complex
new_boresch_B_indices = [ligand_B_inxs_B.index(i) for i in rest_geom_B.guest_atoms]
rest_geom_B.guest_atoms = [ligand_B_inxs[i] for i in new_boresch_B_indices]
if self.verbose:
self.logger.info(
f"restraint geometry is: ligand A: {rest_geom_A}and ligand B: {rest_geom_B}."
)
# We need a temporary thermodynamic state to add the restraint
# & get the correction
thermodynamic_state = ThermodynamicState(
system,
temperature=to_openmm(settings["thermo_settings"].temperature),
pressure=to_openmm(settings["thermo_settings"].pressure),
)
# Add the force to the thermodynamic state
restraint_A.add_force(
thermodynamic_state,
rest_geom_A,
controlling_parameter_name="lambda_restraints_A",
)
restraint_B.add_force(
thermodynamic_state,
rest_geom_B,
controlling_parameter_name="lambda_restraints_B",
)
# Get the standard state correction as a unit.Quantity
correction_A = restraint_A.get_standard_state_correction(
thermodynamic_state,
rest_geom_A,
)
correction_B = restraint_B.get_standard_state_correction(
thermodynamic_state,
rest_geom_B,
)
# Multiply the correction for ligand B by -1 as for this ligands,
# Boresch restraint has to be turned on in the analytical corr.
correction_B = -correction_B # type: ignore[operator]
# Get the system
# Note: you have to remove the thermostat, otherwise you end up
# with an Andersen thermostat by default!
restrained_system = thermodynamic_state.get_system(remove_thermostat=True)
return (
correction_A,
correction_B,
restrained_system,
rest_geom_A,
rest_geom_B,
)
def run(
self,
dry=False,
verbose=True,
scratch_basepath=None,
shared_basepath=None,
) -> dict[str, Any]:
"""
Run the SepTop free energy calculation.
Parameters
----------
dry : bool
Do a dry run of the calculation, creating all necessary alchemical
system components (topology, system, sampler, etc...) but without
running the simulation, default False
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging, default True
scratch_basepath : pathlib.Path
Path to the scratch (temporary) directory space.
shared_basepath : pathlib.Path
Path to the shared (persistent) directory space.
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
"""
# 0. General preparation tasks
self._prepare(verbose, scratch_basepath, shared_basepath)
self.logger.info("Setting up SepTop complex system.")
# 1. Get components
self.logger.info("Creating and setting up the OpenMM systems")
alchem_comps, solv_comp, prot_comp, smc_comps = self._get_components()
smc_comps_A, smc_comps_B, smc_comps_AB = self.get_smc_comps(alchem_comps, smc_comps)
# 3. Get settings
settings = self._get_settings()
# 4. Assign partial charges
self._assign_partial_charges(settings["charge_settings"], smc_comps_AB)
# 5. Get the OpenMM systems
omm_system_A, omm_topology_A, positions_A, modeller_A, comp_resids_A = (
self.get_system(
solv_comp,
prot_comp,
smc_comps_A,
settings,
)
) # fmt: skip
omm_system_B, omm_topology_B, positions_B, modeller_B, comp_resids_B = (
self.get_system(
solv_comp,
prot_comp,
smc_comps_B,
settings,
)
) # fmt: skip
smc_B_unique_keys = smc_comps_B.keys() - smc_comps_A.keys()
smc_comp_B_unique = {key: smc_comps_B[key] for key in smc_B_unique_keys}
omm_system_AB, omm_topology_AB, positions_AB, modeller_AB = self.get_system_AB(
solv_comp,
modeller_A,
smc_comps_AB,
smc_comp_B_unique,
settings,
)
# Get the comp_resids of the AB system
resids_A = list(itertools.chain(*comp_resids_A.values()))
resids_AB = [r.index for r in modeller_AB.topology.residues()]
diff_resids = list(set(resids_AB) - set(resids_A))
comp_resids_AB = comp_resids_A | {alchem_comps["stateB"][0]: np.array(diff_resids)}
# 6. Pre-equilbrate System (for restraint selection)
platform = omm_compute.get_openmm_platform(
platform_name=settings["engine_settings"].compute_platform,
gpu_device_index=settings["engine_settings"].gpu_device_index,
restrict_cpu_count=False,
)
self.logger.info("Pre-equilibrating the systems")
equil_positions_A, box_A = _pre_equilibrate(
system=omm_system_A,
topology=omm_topology_A,
positions=positions_A,
settings=settings,
endstate="A",
dry=dry,
shared_basepath=self.shared_basepath,
platform=platform,
verbose=self.verbose,
logger=self.logger,
)
equil_positions_B, box_B = _pre_equilibrate(
system=omm_system_B,
topology=omm_topology_B,
positions=positions_B,
settings=settings,
endstate="B",
dry=dry,
shared_basepath=self.shared_basepath,
platform=platform,
verbose=self.verbose,
logger=self.logger,
)
# 7. Get all the right atom indices for alignments
comp_atomids_A = self._get_atom_indices(omm_topology_A, comp_resids_A)
all_atom_ids_A = list(itertools.chain(*comp_atomids_A.values()))
comp_atomids_B = self._get_atom_indices(omm_topology_B, comp_resids_B)
# Get the atom indices of ligand B in system B
atom_indices_B = comp_atomids_B[alchem_comps["stateB"][0]]
# 8. Update the positions of system B: Align protein
updated_positions_B = self._update_positions(
omm_topology_A,
omm_topology_B,
equil_positions_A,
equil_positions_B,
)
# Get atom indices for ligand A and ligand B and the solvent in the
# system AB
comp_atomids_AB = self._get_atom_indices(omm_topology_AB, comp_resids_AB)
atom_indices_AB_B = comp_atomids_AB[alchem_comps["stateB"][0]]
atom_indices_AB_A = comp_atomids_AB[alchem_comps["stateA"][0]]
# Update positions from AB system
positions_AB[all_atom_ids_A[0] : all_atom_ids_A[-1] + 1, :] = equil_positions_A
positions_AB[atom_indices_AB_B[0] : atom_indices_AB_B[-1] + 1, :] = updated_positions_B[
atom_indices_B[0] : atom_indices_B[-1] + 1
]
# 9. Create the alchemical system
self.logger.info("Creating the alchemical system and applying restraints")
alchemical_factory, alchemical_system = self._get_alchemical_system(
omm_system_AB,
atom_indices_AB_A,
atom_indices_AB_B,
settings["alchemical_settings"],
)
# 10. Apply Restraints
corr_A, corr_B, system, restraint_geom_A, restraint_geom_B = self._add_restraints(
alchemical_system,
omm_topology_A,
omm_topology_B,
equil_positions_A,
equil_positions_B,
alchem_comps["stateA"][0],
alchem_comps["stateB"][0],
atom_indices_AB_A,
atom_indices_AB_B,
atom_indices_B,
comp_atomids_AB[prot_comp],
settings,
)
equil_positions_AB, box_AB = _pre_equilibrate(
system=system,
topology=omm_topology_AB,
positions=positions_AB,
settings=settings,
endstate="AB",
dry=dry,
platform=platform,
shared_basepath=self.shared_basepath,
verbose=self.verbose,
logger=self.logger,
)
# Update box vectors
omm_topology_AB.setPeriodicBoxVectors(box_AB)
# Subselect system based on user inputs & write initial subsampled PDB
sub_pdb_structure = self.shared_basepath / settings["output_settings"].output_structure
selection_indices = self._subsample_topology(
topology=omm_topology_AB,
positions=positions_AB,
output_selection=settings["output_settings"].output_indices,
output_file=self.shared_basepath / settings["output_settings"].output_structure,
)
# The subsampled PDB may not have been written if selection_indices == 0
# Issue #1942 - maybe move this to the method?
if len(selection_indices) == 0:
sub_pdb_structure = None
# Serialize the system and PDB topology
system_outfile = self.shared_basepath / "system.xml.bz2"
serialize(system, system_outfile)
topology_file = self.shared_basepath / "topology.pdb"
openmm.app.pdbfile.PDBFile.writeFile(
omm_topology_AB,
equil_positions_AB,
open(topology_file, "w"),
)
if not dry:
return {
"system": system_outfile,
"topology": topology_file,
"standard_state_correction_A": corr_A.to("kilocalorie_per_mole"),
"standard_state_correction_B": corr_B.to("kilocalorie_per_mole"),
"restraint_geometry_A": restraint_geom_A.model_dump(),
"restraint_geometry_B": restraint_geom_B.model_dump(),
"selection_indices": selection_indices,
"subsampled_pdb_structure": sub_pdb_structure,
}
else:
return {
# Add in various objects we can use to test the system
"system": system_outfile,
"topology": topology_file,
"system_A": omm_system_A,
"system_B": omm_system_B,
"system_AB": omm_system_AB,
"alchem_restrained_system": system,
"alchem_system": alchemical_system,
"alchem_factory": alchemical_factory,
"positions": equil_positions_AB,
"selection_indices": selection_indices,
"subsampled_pdb_structure": sub_pdb_structure,
}
class SepTopSolventSetupUnit(SepTopSolventMixin, BaseSepTopSetupUnit):
"""
Protocol Unit for the solvent phase of a relative SepTop free energy
"""
simtype = "solvent"
@staticmethod
def _update_positions(
mol_A: SmallMoleculeComponent,
mol_B: SmallMoleculeComponent,
) -> SmallMoleculeComponent:
"""
Computes the amount to offset the second ligand by in the solution
phase during RBFE calculations and applies the offset to the ligand,
returning the SmallMoleculeComponent with the updated positions.
Parameters
----------
mol_A: SmallMoleculeComponent
The SmallMoleculeComponent of ligand A
mol_B: SmallMoleculeComponent
The SmallMoleculeComponent of ligand B
Returns
-------
updated_mol_B: SmallMoleculeComponent
The SmallMoleculeComponent of ligand B after updating its positions
to be a certain distance away from ligand A
"""
# Convert SmallMolecule to Rdkit Molecule
rdmol_A = mol_A.to_rdkit()
rdmol_B = mol_B.to_rdkit()
# Offset ligand B from ligand A in the solvent
pos_ligandA = rdmol_A.GetConformers()[0].GetPositions()
pos_ligandB = rdmol_B.GetConformers()[0].GetPositions()
ligand_1_radius = np.linalg.norm(pos_ligandA - pos_ligandA.mean(axis=0), axis=1).max()
ligand_2_radius = np.linalg.norm(pos_ligandB - pos_ligandB.mean(axis=0), axis=1).max()
ligand_distance = (ligand_1_radius + ligand_2_radius) * 1.5
ligand_offset = pos_ligandA.mean(0) - pos_ligandB.mean(0)
ligand_offset[0] += ligand_distance
# Offset the ligandB.
pos_ligandB += ligand_offset
# Extract updated system positions.
rdmol_B.GetConformers()[0].SetPositions(pos_ligandB)
updated_mol_B = SmallMoleculeComponent(rdmol_B)
return updated_mol_B
def _add_restraints(
self,
system: openmm.System,
ligand_1: Chem.rdchem.Mol,
ligand_2: Chem.rdchem.Mol,
ligand_1_inxs: list[int],
ligand_2_inxs: list[int],
settings: dict[str, SettingsBaseModel],
positions_AB: openmm.unit.Quantity,
) -> tuple[
Quantity,
openmm.System,
]:
"""
Apply the distance restraint between the ligands.
Parameters
----------
system: openmm.System
The OpenMM system where the restraints will be applied to.
ligand_1: Chem.rdchem.Mol
The RDKit Molecule of ligand A
ligand_2: Chem.rdchem.Mol
The RDKit Molecule of ligand B
ligand_1_idxs: list[int]
Atom indices from the ligand A in the system.
ligand_2_idxs: list[int]
Atom indices from the ligand B in the system.
settings: dict[str, SettingsBaseModel]
The settings dict
positions_AB: openmm.unit.Quantity
The positions of the OpenMM system
Returns
-------
correction: unit.Quantity
Standard state correction for the harmonic distance restraint.
system: openmm.System
The OpenMM system with the added restraints forces
"""
if isinstance(settings["restraint_settings"], DistanceRestraintSettings):
rest_geom = geometry.harmonic.get_molecule_centers_restraint(
molA_rdmol=ligand_1,
molB_rdmol=ligand_2,
molA_idxs=ligand_1_inxs,
molB_idxs=ligand_2_inxs,
)
else:
# TODO turn this into a direction for different restraint types supported?
raise NotImplementedError("Other restraint types are not yet available")
if self.verbose:
self.logger.info(f"restraint geometry is: {rest_geom}")
distance = np.linalg.norm(
positions_AB[rest_geom.guest_atoms[0]] - positions_AB[rest_geom.host_atoms[0]]
)
k_distance = to_openmm(settings["restraint_settings"].spring_constant)
force = openmm.HarmonicBondForce()
force.addBond(
rest_geom.guest_atoms[0],
rest_geom.host_atoms[0],
distance * openmm.unit.nanometers,
k_distance,
)
force.setName("alignment_restraint")
# Add force to a separate force group
add_force_in_separate_group(system, force)
# No correction necessary as only a single harmonic bond is applied between the ligands
correction = (
from_openmm(
openmm.unit.MOLAR_GAS_CONSTANT_R
* to_openmm(settings["thermo_settings"].temperature)
)
* 0.0
)
return correction, system
def run(
self, dry=False, verbose=True, scratch_basepath=None, shared_basepath=None
) -> dict[str, Any]:
"""
Run the SepTop free energy calculation.
Parameters
----------
dry : bool
Do a dry run of the calculation, creating all necessary alchemical
system components (topology, system, sampler, etc...) but without
running the simulation, default False
verbose : bool
Verbose output of the simulation progress. Output is provided via
INFO level logging, default True
scratch_basepath : pathlib.Path
Path to the scratch (temporary) directory space.
shared_basepath : pathlib.Path
Path to the shared (persistent) directory space.
Returns
-------
dict
Outputs created in the basepath directory or the debug objects
(i.e. sampler) if ``dry==True``.
"""
# 0. General preparation tasks
self._prepare(verbose, scratch_basepath, shared_basepath)
self.logger.info("Setting up SepTop solvent system.")
# 1. Get components
self.logger.info("Creating and setting up the OpenMM systems")
alchem_comps, solv_comp, prot_comp, smc_comps = self._get_components()
smc_comps_A, smc_comps_B, smc_comps_AB = self.get_smc_comps(alchem_comps, smc_comps)
# 2. Get settings
settings = self._get_settings()
# 3. Assign partial charges
self._assign_partial_charges(settings["charge_settings"], smc_comps_AB)
# 4. Update the positions of ligand B:
# - solvent: Offset ligand B with respect to ligand A
smc_B = self._update_positions(
alchem_comps["stateA"][0],
alchem_comps["stateB"][0],
)
smc_off_B = {smc_B: smc_B.to_openff()}
# 5. Get the OpenMM systems
omm_system_AB, omm_topology_AB, positions_AB, modeller_AB, comp_resids_AB = (
self.get_system(
solv_comp,
prot_comp,
smc_comps_A | smc_off_B,
settings,
)
) # fmt: skip
# 6. Get atom indices for ligand A and ligand B and the solvent in the
# system AB
comp_atomids_AB = self._get_atom_indices(omm_topology_AB, comp_resids_AB)
atom_indices_AB_A = comp_atomids_AB[alchem_comps["stateA"][0]]
atom_indices_AB_B = comp_atomids_AB[smc_B]
# 7. Create the alchemical system
self.logger.info("Creating the alchemical system and applying restraints")
alchemical_factory, alchemical_system = self._get_alchemical_system(
omm_system_AB,
atom_indices_AB_A,
atom_indices_AB_B,
settings["alchemical_settings"],
)
# 8. Apply Restraints
rdmol_A = alchem_comps["stateA"][0].to_rdkit()
rdmol_B = smc_B.to_rdkit()
Chem.SanitizeMol(rdmol_A)
Chem.SanitizeMol(rdmol_B)
corr, system = self._add_restraints(
alchemical_system,
rdmol_A,
rdmol_B,
atom_indices_AB_A,
atom_indices_AB_B,
settings,
positions_AB,
)
# Write the full system PDB
topology_file = self.shared_basepath / "topology.pdb"
openmm.app.pdbfile.PDBFile.writeFile(
omm_topology_AB, positions_AB, open(topology_file, "w")
)
# Subselect system based on user inputs & write initial subsampled PDB
sub_pdb_structure = self.shared_basepath / settings["output_settings"].output_structure
selection_indices = self._subsample_topology(
topology=omm_topology_AB,
positions=positions_AB,
output_selection=settings["output_settings"].output_indices,
output_file=self.shared_basepath / settings["output_settings"].output_structure,
)
# The subsampled PDB may not have been written if selection_indices == 0
# Issue #1942 - maybe move this to the method?
if len(selection_indices) == 0:
sub_pdb_structure = None
# Serialize the system
system_outfile = self.shared_basepath / "system.xml.bz2"
serialize(system, system_outfile)
if not dry:
return {
"system": system_outfile,
"topology": topology_file,
"standard_state_correction": corr.to("kilocalorie_per_mole"),
"selection_indices": selection_indices,
"subsampled_pdb_structure": sub_pdb_structure,
}
else:
return {
# Add in various objects we can used to test the system
"system": system_outfile,
"topology": topology_file,
"system_AB": omm_system_AB,
"alchem_restrained_system": system,
"alchem_system": alchemical_system,
"alchem_factory": alchemical_factory,
"positions": positions_AB,
"selection_indices": selection_indices,
"subsampled_pdb_structure": sub_pdb_structure,
}
class SepTopSolventRunUnit(SepTopSolventMixin, BaseSepTopRunUnit):
"""
Protocol Unit for the solvent phase of a relative SepTop free energy
"""
simtype = "solvent"
def _get_lambda_schedule(
self, settings: dict[str, SettingsBaseModel]
) -> dict[str, list[float]]:
lambdas = dict()
lambda_elec_A = settings["lambda_settings"].lambda_elec_A
lambda_vdw_A = settings["lambda_settings"].lambda_vdw_A
lambda_elec_B = settings["lambda_settings"].lambda_elec_B
lambda_vdw_B = settings["lambda_settings"].lambda_vdw_B
# Reverse lambda schedule since in AbsoluteAlchemicalFactory 1
# means fully interacting, not stateB
lambda_elec_A = [1 - x for x in lambda_elec_A]
lambda_vdw_A = [1 - x for x in lambda_vdw_A]
lambda_elec_B = [1 - x for x in lambda_elec_B]
lambda_vdw_B = [1 - x for x in lambda_vdw_B]
# # Set lambda restraint for the solvent to 1
# lambda_restraints = len(lambda_elec_A) * [1]
lambdas["lambda_electrostatics_A"] = lambda_elec_A
lambdas["lambda_sterics_A"] = lambda_vdw_A
lambdas["lambda_electrostatics_B"] = lambda_elec_B
lambdas["lambda_sterics_B"] = lambda_vdw_B
# lambdas['lambda_restraints'] = lambda_restraints
return lambdas
class SepTopComplexRunUnit(SepTopComplexMixin, BaseSepTopRunUnit):
"""
Protocol Unit for the complex phase of a relative SepTop free energy
"""
simtype = "complex"
def _get_lambda_schedule(
self, settings: dict[str, SettingsBaseModel]
) -> dict[str, list[float]]:
lambdas = dict()
lambda_elec_A = settings["lambda_settings"].lambda_elec_A
lambda_vdw_A = settings["lambda_settings"].lambda_vdw_A
lambda_elec_B = settings["lambda_settings"].lambda_elec_B
lambda_vdw_B = settings["lambda_settings"].lambda_vdw_B
lambda_restraints_A = settings["lambda_settings"].lambda_restraints_A
lambda_restraints_B = settings["lambda_settings"].lambda_restraints_B
# Reverse lambda schedule since in AbsoluteAlchemicalFactory 1
# means fully interacting, not stateB
lambda_elec_A = [1 - x for x in lambda_elec_A]
lambda_vdw_A = [1 - x for x in lambda_vdw_A]
lambda_elec_B = [1 - x for x in lambda_elec_B]
lambda_vdw_B = [1 - x for x in lambda_vdw_B]
lambdas["lambda_electrostatics_A"] = lambda_elec_A
lambdas["lambda_sterics_A"] = lambda_vdw_A
lambdas["lambda_electrostatics_B"] = lambda_elec_B
lambdas["lambda_sterics_B"] = lambda_vdw_B
lambdas["lambda_restraints_A"] = lambda_restraints_A
lambdas["lambda_restraints_B"] = lambda_restraints_B
return lambdas
class SepTopSolventAnalysisUnit(SepTopSolventMixin, BaseSepTopAnalysisUnit):
"""
Protocol Unit for the analysis of the solvent phase of a relative SepTop free energy
"""
simtype = "solvent"
class SepTopComplexAnalysisUnit(SepTopComplexMixin, BaseSepTopAnalysisUnit):
"""
Protocol Unit for the analysis of the complex phase of a relative SepTop free energy
"""
simtype = "complex"
================================================
FILE: src/openfe/protocols/openmm_septop/utils.py
================================================
from openmmtools import states
from openmmtools.states import GlobalParameterState
class SepTopParameterState(GlobalParameterState):
"""
Composable state to control lambda parameters for two ligands.
See :class:`openmmtools.states.GlobalParameterState` for more details.
Parameters
----------
parameters_name_suffix : Optional[str]
If specified, the state will control a modified version of the parameter
``lambda_restraints_{parameters_name_suffix}` instead of just
``lambda_restraints``.
lambda_sterics_A : Optional[float]
The value for the vdW interactions for ligand A.
If defined, must be between 0 and 1.
lambda_electrosterics_A : Optional[float]
The value for the electrostatics interactions for ligand A.
If defined, must be between 0 and 1.
lambda_restraints_A : Optional[float]
The strength of the restraint for ligand A.
If defined, must be between 0 and 1.
lambda_bonds_A : Optional[float]
The value for modifying bonds for ligand A.
If defined, must be between 0 and 1.
lambda_angles_A : Optional[float]
The value for modifying angles for ligand A.
If defined, must be between 0 and 1.
lambda_dihedrals_A : Optional[float]
The value for modifying dihedrals for ligand A.
If defined, must be between 0 and 1.
lambda_sterics_B : Optional[float]
The value for the vdW interactions for ligand B.
If defined, must be between 0 and 1.
lambda_electrosterics_B : Optional[float]
The value for the electrostatics interactions for ligand B.
If defined, must be between 0 and 1.
lambda_restraints_B : Optional[float]
The strength of the restraint for ligand B.
If defined, must be between 0 and 1.
lambda_bonds_B : Optional[float]
The value for modifying bonds for ligand B.
If defined, must be between 0 and 1.
lambda_angles_B : Optional[float]
The value for modifying angles for ligand B.
If defined, must be between 0 and 1.
lambda_dihedrals_B : Optional[float]
The value for modifying dihedrals for ligand B.
If defined, must be between 0 and 1.
"""
class _LambdaParameter(states.GlobalParameterState.GlobalParameter):
"""A global parameter in the interval [0, 1] with standard
value 1."""
def __init__(self, parameter_name):
super().__init__(parameter_name, standard_value=1.0, validator=self.lambda_validator)
@staticmethod
def lambda_validator(self, instance, parameter_value):
if parameter_value is None:
return parameter_value
if not (0.0 <= parameter_value <= 1.0):
raise ValueError("{} must be between 0 and 1.".format(self.parameter_name))
return float(parameter_value)
# Lambda parameters for ligand A
lambda_sterics_A = _LambdaParameter("lambda_sterics_A")
lambda_electrostatics_A = _LambdaParameter("lambda_electrostatics_A")
lambda_restraints_A = _LambdaParameter("lambda_restraints_A")
lambda_bonds_A = _LambdaParameter("lambda_bonds_A")
lambda_angles_A = _LambdaParameter("lambda_angles_A")
lambda_torsions_A = _LambdaParameter("lambda_torsions_A")
# Lambda parameters for ligand B
lambda_sterics_B = _LambdaParameter("lambda_sterics_B")
lambda_electrostatics_B = _LambdaParameter("lambda_electrostatics_B")
lambda_restraints_B = _LambdaParameter("lambda_restraints_B")
lambda_bonds_B = _LambdaParameter("lambda_bonds_B")
lambda_angles_B = _LambdaParameter("lambda_angles_B")
lambda_torsions_B = _LambdaParameter("lambda_torsions_B")
# # Restraints solvent
# lambda_restraints = _LambdaParameter('lambda_restraints')
================================================
FILE: src/openfe/protocols/openmm_utils/__init__.py
================================================
================================================
FILE: src/openfe/protocols/openmm_utils/charge_generation.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Reusable utilities for assigning partial charges to ChemicalComponents.
"""
import copy
import sys
import warnings
from typing import Callable, Literal
import numpy as np
from gufe import SmallMoleculeComponent
from openff.toolkit import Molecule as OFFMol
from openff.toolkit.utils.base_wrapper import ToolkitWrapper
from openff.toolkit.utils.toolkit_registry import ToolkitRegistry
from openff.toolkit.utils.toolkits import (
AmberToolsToolkitWrapper,
OpenEyeToolkitWrapper,
RDKitToolkitWrapper,
)
from openff.units import unit
from threadpoolctl import threadpool_limits
try:
import openeye
except ImportError:
HAS_OPENEYE = False
else:
HAS_OPENEYE = True
try:
from openff.toolkit.utils.toolkit_registry import (
toolkit_registry_manager,
)
except ImportError:
# toolkit_registry_manager was made non private in 0.14.4
from openff.toolkit.utils.toolkit_registry import (
_toolkit_registry_manager as toolkit_registry_manager,
)
try:
from openff.nagl_models import (
get_models_by_type,
validate_nagl_model_path,
)
from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper
except ImportError:
HAS_NAGL = False
else:
HAS_NAGL = True
try:
from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper
except ImportError:
HAS_ESPALOMA_CHARGE = False
else:
HAS_ESPALOMA_CHARGE = True
# Dictionary of lists for the various backend options we allow.
# Note: can't create the classes ahead of time in case we end
# up with a case where the tool is not available, e.g. if OpenEye tk
# is not installed.
BACKEND_OPTIONS: dict[str, list[ToolkitWrapper]] = {
"ambertools": [RDKitToolkitWrapper, AmberToolsToolkitWrapper],
"openeye": [OpenEyeToolkitWrapper],
"rdkit": [RDKitToolkitWrapper],
}
def assign_offmol_espaloma_charges(offmol: OFFMol, toolkit_registry: ToolkitRegistry) -> None:
"""
Assign Espaloma charges using the OpenFF toolkit.
Parameters
----------
offmol : openff.toolkit.Molecule
OpenFF molecule to assign NAGL partial charges for.
toolkit_registry : ToolkitRegistry
Toolkit registry to use for assigning partial charges.
This strictly limits available toolkit wrappers by
overwriting the global registry during the partial charge
assignment stage.
"""
if not HAS_ESPALOMA_CHARGE:
errmsg = "The Espaloma ToolkiWrapper is not available, please install espaloma_charge"
raise ImportError(errmsg)
warnings.warn("Using espaloma to assign charges is not well tested", category=RuntimeWarning)
# make a copy to remove conformers as espaloma enforces
# a 0 conformer check
offmol_copy = copy.deepcopy(offmol)
offmol_copy._conformers = None
# We are being overly cautious by applying the manager here
# this is to avoid issues like:
# https://github.com/openforcefield/openff-nagl/issues/69
with toolkit_registry_manager(toolkit_registry):
offmol_copy.assign_partial_charges(
partial_charge_method="espaloma-am1bcc",
toolkit_registry=EspalomaChargeToolkitWrapper(),
)
# Copy back charges into the original offmol object
offmol.partial_charges = offmol_copy.partial_charges
def assign_offmol_nagl_charges(
offmol: OFFMol,
toolkit_registry: ToolkitRegistry,
nagl_model: str | None = None,
) -> None:
"""
Assign NAGL charges using the OpenFF toolkit.
Parameters
----------
offmol : openff.toolkit.Molecule
OpenFF molecule to assign NAGL partial charges for.
toolkit_registry : ToolkitRegistry
Toolkit registry to use for assigning partial charges.
This strictly limits available toolkit wrappers by
overwriting the global registry during the partial charge
assignment stage.
nagl_model : str | None
The NAGL model to use when assigning partial charges.
If ``None``, will fetch the latest production "am1bcc" model.
"""
if not HAS_NAGL:
errmsg = (
"The NAGL toolkit is not available, you may "
"be using an older version of the OpenFF "
"toolkit - you need v0.14.4 or above"
)
raise ImportError(errmsg)
if nagl_model is None:
prod_models = get_models_by_type(model_type="am1bcc", production_only=True)
try:
nagl_model = prod_models[-1]
except IndexError:
errmsg = (
"No production am1bcc NAGL models were found, "
"please manually select a candidate release model."
)
raise ValueError(errmsg)
model_path = validate_nagl_model_path(nagl_model)
# We are being overly cautious by applying the manager here
# this is to avoid issues like:
# https://github.com/openforcefield/openff-nagl/issues/69
with toolkit_registry_manager(toolkit_registry):
offmol.assign_partial_charges(
partial_charge_method=model_path,
toolkit_registry=NAGLToolkitWrapper(),
)
def assign_offmol_am1bcc_charges(
offmol: OFFMol,
partial_charge_method: Literal["am1bcc", "am1bccelf10"],
toolkit_registry: ToolkitRegistry,
) -> None:
"""
Assign AM1BCC charges using the OpenFF toolkit.
Parameters
----------
offmol : openff.toolkit.Molecule
OpenFF Molecule to assign AM1BCC charges for.
Must already have a conformer.
partial_charge_method : Literal['am1bcc', 'am1bccelf10']
The partial charge method to employ.
Options include `am1bcc`, `am1bccelf10`.
toolkit_registry : ToolkitRegistry
Toolkit registry to use for assigning partial charges.
This strictly limits available toolkit wrappers by
overwriting the global registry during the partial charge
assignment stage.
Raises
------
ValueError
If the ``offmol`` does not have any conformers.
"""
if offmol.n_conformers == 0:
errmsg = "method expects at least one conformer"
raise ValueError(errmsg)
# We are being overly cautious by both passing the
# registry and applying the manager here - this is
# to avoid issues like:
# https://github.com/openforcefield/openff-nagl/issues/69
with toolkit_registry_manager(toolkit_registry):
offmol.assign_partial_charges(
partial_charge_method=partial_charge_method,
use_conformers=offmol.conformers,
toolkit_registry=toolkit_registry,
)
def _generate_offmol_conformers(
offmol: OFFMol,
max_conf: int,
toolkit_registry: ToolkitRegistry,
generate_n_conformers: int | None,
) -> None:
"""
Helper method for OFF Molecule conformer generation in charge assignment.
Parameters
----------
offmol : openff.toolkit.Molecule
OpenFF Molecule to generate conformers for
max_conf : int
The maximum number of conformers supported by requested charge method.
toolkit_registry : ToolkitRegistry
Toolkit registry to use for generating conformers.
This strictly limits available toolkit wrappers by
overwriting the global registry during the conformer generation step.
generate_n_conformers : int | None
The number of conformers to generate. If ``None``, the existing
conformers are retained & used.
Raises
------
ValueError
If the ``generate_n_conformers`` is ``None`` and there are either
no conformers or more than ``max_conf`` conformers associated with
the input ``offmol``.
If ``generate_n_conformers`` is greater than the value of ``max_conf``.
"""
# Check number of conformers if generate_n_conformers is None and return
if generate_n_conformers is None:
if offmol.n_conformers == 0:
errmsg = (
"No conformers are associated with input OpenFF "
"Molecule. Need at least one for partial charge "
"assignment"
)
raise ValueError(errmsg)
if offmol.n_conformers > max_conf:
errmsg = (
"OpenFF Molecule has too many conformers: "
f"{offmol.n_conformers}, selected partial charge "
f"method can only support a maximum of {max_conf} "
"conformers."
)
raise ValueError(errmsg)
return
# Check that generate_n_conformers < max_conf
if generate_n_conformers > max_conf:
errmsg = (
f"{generate_n_conformers} conformers were requested "
"for partial charge generation, but the selected "
"method only supports up to {max_conf} conformers."
)
raise ValueError(errmsg)
# Generate conformers
# OpenEye tk needs cis carboxylic acids
make_carbox_cis = any(
[isinstance(i, OpenEyeToolkitWrapper) for i in toolkit_registry.registered_toolkits]
)
# We are being overly cautious by both passing the
# registry and applying the manager here - this is
# to avoid issues like:
# https://github.com/openforcefield/openff-nagl/issues/69
with toolkit_registry_manager(toolkit_registry):
offmol.generate_conformers(
n_conformers=generate_n_conformers,
rms_cutoff=0.25 * unit.angstrom,
make_carboxylic_acids_cis=make_carbox_cis,
toolkit_registry=toolkit_registry,
)
def assign_offmol_partial_charges(
offmol: OFFMol,
overwrite: bool,
method: Literal["am1bcc", "am1bccelf10", "nagl", "espaloma"],
toolkit_backend: Literal["ambertools", "openeye", "rdkit"],
generate_n_conformers: int | None,
nagl_model: str | None,
) -> OFFMol:
"""
Assign partial charges to an OpenFF Molecule based on a selected method.
Parameters
----------
offmol : openff.toolkit.Molecule
The Molecule to assign partial charges to.
overwrite : bool
Whether or not to overwrite any existing non-zero partial charges.
Note that zeroed charges will always be overwritten.
method : Literal['am1bcc', 'am1bccelf10', 'nagl', 'espaloma']
Partial charge assignment method.
Supported methods include; am1bcc, am1bccelf10, nagl, and espaloma.
toolkit_backend : Literal['ambertools', 'openeye', 'rdkit']
OpenFF toolkit backend employed for charge generation.
Supported options:
* ``ambertools``: selects both the AmberTools and RDKit Toolkit Wrapper
* ``openeye``: selects the OpenEye toolkit Wrapper
* ``rdkit``: selects the RDKit toolkit Wrapper
Note that the ``rdkit`` backend cannot be used for `am1bcc` or
``am1bccelf10`` partial charge methods.
generate_n_conformers : int | None
Number of conformers to generate for partial charge generation.
If ``None``, the input conformer will be used.
Values greater than 1 can only be used alongside ``am1bccelf10``.
nagl_model : str | None
The NAGL model to use for charge assignment if method is ``nagl``.
If ``None``, the latest am1bcc NAGL charge model is used.
Raises
------
ValueError
If the ``toolkit_backend`` is not supported by the selected ``method``.
If ``generate_n_conformers`` is ``None``, but the input ``offmol``
has no associated conformers.
If the number of conformers passed or generated exceeds the number
of conformers selected by the partial charge ``method``.
Returns
-------
The Molecule with partial charges assigned.
"""
# If you have non-zero charges and not overwriting, just return
if offmol.partial_charges is not None and np.any(offmol.partial_charges):
if not overwrite:
return offmol
# Dictionary for each available charge method
# The idea of this pattern is to allow for maximum flexibility by
# allowing for swapping out method calls as necessary.
#
# Must include:
# 1. `confgen_func`: the conformer generation method
# 2. `charge_func`: the partial charge assignment method
# 2. `backends`: the allowed backends for the method
# 3. `max_conf`: maximum number of allowed conformations for the method
# 4. `charge_extra_kwargs`: any additional kwargs to be passed to the
# partial charge assignment method beyond the input offmol and
# the toolkitregistry
CHARGE_METHODS = {
"am1bcc": {
"confgen_func": _generate_offmol_conformers,
"charge_func": assign_offmol_am1bcc_charges,
"backends": ["ambertools", "openeye"],
"max_conf": 1,
"charge_extra_kwargs": {"partial_charge_method": "am1bcc"},
},
"am1bccelf10": {
"confgen_func": _generate_offmol_conformers,
"charge_func": assign_offmol_am1bcc_charges,
"backends": ["openeye"],
"max_conf": sys.maxsize,
"charge_extra_kwargs": {"partial_charge_method": "am1bccelf10"},
},
"nagl": {
"confgen_func": _generate_offmol_conformers,
"charge_func": assign_offmol_nagl_charges,
"backends": ["openeye", "rdkit", "ambertools"],
"max_conf": 1,
"charge_extra_kwargs": {"nagl_model": nagl_model},
},
"espaloma": {
"confgen_func": _generate_offmol_conformers,
"charge_func": assign_offmol_espaloma_charges,
"backends": ["rdkit", "ambertools"],
"max_conf": 1,
"charge_extra_kwargs": {},
},
}
# Grab the backends and also check our method
try:
backends = CHARGE_METHODS[method.lower()]["backends"]
except KeyError:
errmsg = f"Unknown partial charge method {method}"
raise ValueError(errmsg)
# Check our method actually supports the toolkit backend selected
if toolkit_backend.lower() not in backends: # type: ignore
errmsg = (
f"Selected toolkit_backend ({toolkit_backend}) cannot "
f"be used with the selected method ({method}). "
f"Available backends are: {backends}"
)
raise ValueError(errmsg)
# OpenEye is the only optional dependency in the toolkit backends
if toolkit_backend.lower() == "openeye" and not HAS_OPENEYE:
errmsg = "OpenEye is not available and cannot be selected as a backend"
raise ImportError(errmsg)
# Issue 1760
if HAS_OPENEYE and method.lower() == "nagl":
if toolkit_backend.lower() != "openeye":
errmsg = "OpenEye toolkit is installed but not used in the OpenFF toolkit registry backend. This is not possible with NAGL charges."
raise ValueError(errmsg)
toolkits = ToolkitRegistry([i() for i in BACKEND_OPTIONS[toolkit_backend.lower()]])
# We make a copy of the molecule since we're going to modify conformers
offmol_copy = copy.deepcopy(offmol)
# Generate conformers - note this method may differ based on the partial
# charge method employed
CHARGE_METHODS[method.lower()]["confgen_func"](
offmol=offmol_copy,
max_conf=CHARGE_METHODS[method.lower()]["max_conf"],
toolkit_registry=toolkits,
generate_n_conformers=generate_n_conformers,
) # type: ignore
# limit the number of threads used by SQM
#
with threadpool_limits(limits=1):
# Call selected method to assign partial charges
CHARGE_METHODS[method.lower()]["charge_func"](
offmol=offmol_copy,
toolkit_registry=toolkits,
**CHARGE_METHODS[method.lower()]["charge_extra_kwargs"],
) # type: ignore
# Copy partial charges back
offmol.partial_charges = offmol_copy.partial_charges
return offmol
def bulk_assign_partial_charges(
molecules: list[SmallMoleculeComponent],
overwrite: bool,
method: Literal["am1bcc", "am1bccelf10", "nagl", "espaloma"],
toolkit_backend: Literal["ambertools", "openeye", "rdkit"],
generate_n_conformers: int | None,
nagl_model: str | None,
processors: int = 1,
) -> list[SmallMoleculeComponent]:
"""
Assign partial charges to a list of SmallMoleculeComponents using multiprocessing.
Parameters
----------
molecules : list[gufe.SmallMoleculeComponent]
The list of molecules who should have partial charges assigned.
overwrite : bool
Whether or not to overwrite any existing non-zero partial charges.
Note that zeroed charges will always be overwritten.
method : Literal['am1bcc', 'am1bccelf10', 'nagl', 'espaloma']
Partial charge assignment method.
Supported methods include; am1bcc, am1bccelf10, nagl, and espaloma.
toolkit_backend : Literal['ambertools', 'openeye', 'rdkit']
OpenFF toolkit backend employed for charge generation.
Supported options:
* ``ambertools``: selects both the AmberTools and RDKit Toolkit Wrapper
* ``openeye``: selects the OpenEye toolkit Wrapper
* ``rdkit``: selects the RDKit toolkit Wrapper
Note that the ``rdkit`` backend cannot be used for `am1bcc` or
``am1bccelf10`` partial charge methods.
generate_n_conformers : int | None
Number of conformers to generate for partial charge generation.
If ``None``, the input conformer will be used.
Values greater than 1 can only be used alongside ``am1bccelf10``.
nagl_model : str | None
The NAGL model to use for charge assignment if method is ``nagl``.
If ``None``, the latest am1bcc NAGL charge model is used.
processors: int, default 1
The number of processors which should be used to generate the charges.
Raises
------
ValueError
If the ``toolkit_backend`` is not supported by the selected ``method``.
If ``generate_n_conformers`` is ``None``, but the input ``offmol``
has no associated conformers.
If the number of conformers passed or generated exceeds the number
of conformers selected by the partial charge ``method``.
Returns
-------
A list of SmallMoleculeComponents with the charges assigned.
"""
import tqdm
charge_keywords = {
"overwrite": overwrite,
"method": method,
"toolkit_backend": toolkit_backend,
"generate_n_conformers": generate_n_conformers,
"nagl_model": nagl_model,
}
charged_ligands = []
if processors > 1:
from concurrent.futures import ProcessPoolExecutor, as_completed
with ProcessPoolExecutor(max_workers=processors) as pool:
work_list = [
pool.submit(
assign_offmol_partial_charges,
m.to_openff(),
**charge_keywords, # type: ignore
)
for m in molecules
]
for work in tqdm.tqdm(
as_completed(work_list), desc="Generating charges", ncols=80, total=len(molecules)
):
charged_ligands.append(SmallMoleculeComponent.from_openff(work.result()))
else:
for m in tqdm.tqdm(molecules, desc="Generating charges", ncols=80, total=len(molecules)):
mol_with_charge = assign_offmol_partial_charges(m.to_openff(), **charge_keywords) # type: ignore
charged_ligands.append(SmallMoleculeComponent.from_openff(mol_with_charge))
return charged_ligands
================================================
FILE: src/openfe/protocols/openmm_utils/mdtraj_utils.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
import mdtraj as mdt
import numpy as np
import openmm
from openmm import unit as omm_unit
def mdtraj_from_openmm(
omm_topology: openmm.app.Topology,
omm_positions: openmm.unit.Quantity,
):
"""
Get an mdtraj object from an OpenMM topology and positions.
Parameters
----------
omm_topology : openmm.app.Topology
The OpenMM topology
omm_positions : openmm.unit.Quantity
The OpenMM positions
Returns
-------
mdtraj_trajectory : md.Trajectory
"""
mdtraj_topology = mdt.Topology.from_openmm(omm_topology)
positions_in_mdtraj_format = omm_positions.value_in_unit(omm_unit.nanometers)
box = omm_topology.getPeriodicBoxVectors()
if box is not None:
x, y, z = [np.array(b._value) for b in box]
lx = np.linalg.norm(x)
ly = np.linalg.norm(y)
lz = np.linalg.norm(z)
# angle between y and z
alpha = np.arccos(np.dot(y, z) / (ly * lz))
# angle between x and z
beta = np.arccos(np.dot(x, z) / (lx * lz))
# angle between x and y
gamma = np.arccos(np.dot(x, y) / (lx * ly))
unitcell_lengths = np.array([lx, ly, lz])
unitcell_angles = np.array([np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)])
else:
unitcell_lengths = None
unitcell_angles = None
mdtraj_trajectory = mdt.Trajectory(
positions_in_mdtraj_format,
mdtraj_topology,
unitcell_lengths=unitcell_lengths,
unitcell_angles=unitcell_angles,
)
return mdtraj_trajectory
================================================
FILE: src/openfe/protocols/openmm_utils/multistate_analysis.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""
Reusable utility methods to analyze results from multistate calculations.
"""
import warnings
from pathlib import Path
from typing import Optional, Union
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
from openff.units import Quantity, unit
from openff.units.openmm import from_openmm
from openmmtools import multistate
from openfe.analysis import plotting
from openfe.due import Doi, due
due.cite(
Doi("10.5281/zenodo.596622"),
description="OpenMMTools",
path="openfe.protocols.openmm_utils.multistate_analysis",
cite_module=True,
)
due.cite(
Doi("10.1063/1.2978177"),
description="MBAR paper",
path="openfe.protocols.openmm_utils.multistate_analysis",
cite_module=True,
)
due.cite(
Doi("10.1021/ct0502864"),
description="MBAR timeseries algorithms",
path="openfe.protocols.openmm_utils.multistate_analysis",
cite_module=True,
)
due.cite(
Doi("10.1021/acs.jctc.5b00784"),
description="Automatic equilibration detection method",
path="openfe.protocols.openmm_utils.multistate_analysis",
cite_module=True,
)
due.cite(
Doi("10.5281/zenodo.596220"),
description="pyMBAR zenodo",
path="openfe.protocols.openmm_utils.multistate_analysis",
cite_module=True,
)
class MultistateEquilFEAnalysis:
"""
A class to generate and plot all necessary analyses for a free energy
calculation using a :class:`openmmtools.MultiStateSampler`.
Currently implemented analyses are:
- Decorrelated MBAR analysis of free energies (and associated errors)
- Number of equilibration & sampling steps
- MBAR overlap matrices
- Replica lambda traversal (exchange matrix and timeseries)
- Forward and reverse analysis of free energies
Parameters
----------
reporter : openmmtools.MultiStateReporter
Reporter for the MultiStateSampler
sampling_method : str
The sampling method. Expected values are `repex`, `sams`,
and `independent`.
result_units : openff.units.Quantity
Units to report results in.
forward_reverse_samples : int
The number of samples to use in the forward and reverse analysis
of the free energies. Default 10.
"""
def __init__(
self,
reporter: multistate.MultiStateReporter,
sampling_method: str,
result_units: Quantity,
forward_reverse_samples: int = 10,
):
self.analyzer = multistate.MultiStateSamplerAnalyzer(reporter)
self.units = result_units
if sampling_method.lower() not in ["repex", "sams", "independent"]:
wmsg = f"Unknown sampling method {sampling_method}"
warnings.warn(wmsg)
self.sampling_method = sampling_method.lower()
# Do a first pass at the analysis
self._analyze(forward_reverse_samples)
def plot(self, filepath: Path, filename_prefix: str):
"""
Plot out results from the free energy analyses.
Specifically the following plots are generated:
* The free energy overlap matrix
* The replica exchange overlap matrix (if sampler_method is repex)
* The timeseries of replica states over time
* The forward and reverse estimate of the free energies
Parameters
----------
filepath : pathlib.Path
The path to where files should be written.
filename_prefix : str
A prefix for the written filenames.
"""
# MBAR overlap matrix
ax = plotting.plot_lambda_transition_matrix(self.free_energy_overlaps["matrix"])
ax.set_title("MBAR overlap matrix")
ax.figure.savefig( # type: ignore
filepath / (filename_prefix + "mbar_overlap_matrix.png")
)
plt.close(ax.figure) # type: ignore
# Reverse and forward analysis
if self.forward_and_reverse_free_energies is not None:
ax = plotting.plot_convergence(self.forward_and_reverse_free_energies, self.units)
ax.set_title("Forward and Reverse free energy convergence")
ax.figure.savefig( # type: ignore
filepath / (filename_prefix + "forward_reverse_convergence.png")
)
plt.close(ax.figure) # type: ignore
# Replica state timeseries plot
ax = plotting.plot_replica_timeseries(self.replica_states, self.equilibration_iterations)
ax.set_title("Change in replica state over time")
ax.figure.savefig( # type: ignore
filepath / (filename_prefix + "replica_state_timeseries.png")
)
plt.close(ax.figure) # type: ignore
# Replica exchange transition matrix
if self.sampling_method == "repex":
ax = plotting.plot_lambda_transition_matrix(self.replica_exchange_statistics["matrix"])
ax.set_title("Replica exchange transition matrix")
ax.figure.savefig( # type: ignore
filepath / (filename_prefix + "replica_exchange_matrix.png")
)
plt.close(ax.figure) # type: ignore
def _analyze(self, forward_reverse_samples: int):
"""
Run the following analyses:
* MBAR free energy difference between end states using
post-equilibration decorrelated samples of the energies.
* Forward and reverse fractional analysis of free energies over
the equilibrated & decorrelated data points.
* MBAR estimate of the overlap matrix across states.
* Replica exchange transition matrix
(if sampler_method is ``repex``)
Parameters
----------
forward_reverse_samples : int
Number of samples to take in the forward and reverse analysis of
the free energies.
"""
# Do things that get badly cached later
self._replica_states = self.analyzer.reporter.read_replica_thermodynamic_states()
# convert full masked array to simple array
# downcast to int32, we don't have more than 4 billion states thankfully
self._replica_states = np.asarray(self._replica_states, dtype=np.int32)
# float conversions to avoid having to deal with numpy dtype serialization
self._equil_iters = float(self.analyzer.n_equilibration_iterations)
self._prod_iters = float(self.analyzer._equilibration_data[2])
# Gather estimate of free energy
self._free_energy, self._free_energy_err = self.get_equil_free_energy()
# forward and reverse analysis
self._forward_reverse = self.get_forward_and_reverse_analysis(forward_reverse_samples)
# Gather overlap matrix
self._overlap_matrix = self.get_overlap_matrix()
# Gather exchange transition matrix
# Note we only generate these for replica exchange calculations
# TODO: consider if this would also work for SAMS
if self.sampling_method == "repex":
self._exchange_matrix = self.get_exchanges()
@staticmethod
def _get_free_energy(
analyzer: multistate.MultiStateSamplerAnalyzer,
u_ln: npt.NDArray,
N_l: npt.NDArray,
bootstraps: int = 1000,
return_units: Quantity = unit.kilocalorie_per_mole,
) -> tuple[Quantity, Quantity]:
"""
Helper method to create an MBAR object and extract free energies
between end states.
Parameters
----------
analyzer : multistate.MultiStateSamplerAnalyzer
MultiStateSamplerAnalyzer to extract free energies from.
u_ln : npt.NDArray
A n_states x (n_sampled_states * n_iterations)
array of energies (in kT).
N_l : npt.NDArray
An array containing the total number of samples drawn from each
state.
bootstraps : int
How many bootstrap samples will be computed. If 0, no bootstraps
will be computed and analytical errors will be returned.
return_units : openff.units.Quantity
The return units the results will be provided in.
Returns
-------
DG : openff.units.Quantity
The free energy difference between the end states.
dDG : openff.units.Quantity
The MBAR bootstrap (1000 iterations) error estimate for the free energy difference.
TODO
----
* Allow folks to pass in extra options for bootstrapping etc..
* Add standard test against analyzer.get_free_energy()
"""
# pymbar has some side effects when imported so we only import it right when we
# need it
from pymbar import MBAR
mbar = MBAR(
u_ln,
N_l,
solver_protocol="robust",
n_bootstraps=bootstraps,
bootstrap_solver_protocol="robust",
)
if bootstraps > 0:
uncertainty_method = "bootstrap"
else:
uncertainty_method = None
r = mbar.compute_free_energy_differences(
compute_uncertainty=True,
uncertainty_method=uncertainty_method,
)
DF_ij = r["Delta_f"]
dDF_ij = r["dDelta_f"]
DG = DF_ij[0, -1] * analyzer.kT
dDG = dDF_ij[0, -1] * analyzer.kT
return (from_openmm(DG).to(return_units), from_openmm(dDG).to(return_units))
def get_equil_free_energy(self) -> tuple[Quantity, Quantity]:
"""
Extract unbiased and uncorrelated estimates of the free energy
and the associated error from a MultiStateSamplerAnalyzer object.
Returns
-------
DG : openff.units.Quantity
The free energy difference between the end states.
dDG : openff.units.Quantity
The MBAR error for the free energy difference estimate.
"""
u_ln_decorr = self.analyzer._unbiased_decorrelated_u_ln
N_l_decorr = self.analyzer._unbiased_decorrelated_N_l
DG, dDG = self._get_free_energy(self.analyzer, u_ln_decorr, N_l_decorr, 1000, self.units)
return DG, dDG
def get_forward_and_reverse_analysis(
self, num_samples: int = 10
) -> Optional[dict[str, Union[npt.NDArray, Quantity]]]:
"""
Calculate free energies with a progressively larger
fraction of the decorrelated timeseries data in both
the forward and reverse direction.
Parameters
----------
num_samples : int
The number data points to sample.
Returns
-------
forward_reverse : Optional[dict[str, Union[npt.NDArray, openff.units.Quantity]]]
If this analysis fails, returns None; otherwise returns a dictionary
containing;
* ``fractions``: fractions of sample used to calculate free energies
* ``forward_DGs`` and `forward_dDGs`: the free energy estimates
and errors along each sample fraction in the forward direction
* ``reverse_DGs`` and `reverse_dDGs`: the free energy estimates
and errors along each sample fraction in the reverse direction
Notes
-----
* This method does not currently use bootstrap uncertainties due to
issues with the solver when using low amounts of data points. All
uncertainties are MBAR analytical errors.
"""
# pymbar has some side effects from being imported, so we only want to import
# it right when we need it
from pymbar.utils import ParameterError
try:
u_ln = self.analyzer._unbiased_decorrelated_u_ln
N_l = self.analyzer._unbiased_decorrelated_N_l
n_states = len(N_l)
# Check that the N_l is the same across all states
if not np.all(N_l == N_l[0]):
errmsg = f"The number of samples is not equivalent across all states {N_l}"
raise ValueError(errmsg)
# Get the chunks of N_l going from 10% to ~ 100%
# Note: you always lose out a few data points but it's fine
chunks = [max(int(N_l[0] / num_samples * i), 1) for i in range(1, num_samples + 1)]
forward_DGs = []
forward_dDGs = []
reverse_DGs = []
reverse_dDGs = []
fractions = []
for chunk in chunks:
new_N_l = np.array([chunk for _ in range(n_states)])
samples = chunk * n_states
# Forward
DG, dDG = self._get_free_energy(
self.analyzer,
u_ln[:, :samples],
new_N_l,
0,
self.units,
)
forward_DGs.append(DG)
forward_dDGs.append(dDG)
# Reverse
DG, dDG = self._get_free_energy(
self.analyzer,
u_ln[:, -samples:],
new_N_l,
0,
self.units,
)
reverse_DGs.append(DG)
reverse_dDGs.append(dDG)
fractions.append(chunk / N_l[0])
except ParameterError:
return None
forward_reverse = {
"fractions": np.array(fractions),
"forward_DGs": Quantity.from_list(forward_DGs), # type: ignore
"forward_dDGs": Quantity.from_list(forward_dDGs), # type: ignore
"reverse_DGs": Quantity.from_list(reverse_DGs), # type: ignore
"reverse_dDGs": Quantity.from_list(reverse_dDGs), # type: ignore
}
return forward_reverse
def get_overlap_matrix(self) -> dict[str, npt.NDArray]:
"""
Generate an overlap matrix across lambda states.
Return
------
overlap_matrix : dict[str, npt.NDArray]
A dictionary containing the following keys:
* ``scalar``: One minus the largest nontrivial eigenvalue
* ``eigenvalues``: The sorted (descending) eigenvalues of the
overlap matrix
* ``matrix``: Estimated overlap matrix of observing a sample from
state i in state j
"""
return self.analyzer.mbar.compute_overlap()
def get_exchanges(self) -> dict[str, npt.NDArray]:
"""
Gather both the transition matrix (and relevant eigenvalues) between
replicas.
Return
------
transition_matrix : dict[str, npt.NDArray]
A dictionary containing the following:
* ``eigenvalues``: The sorted (descending) eigenvalues of the
lambda state transition matrix
* ``matrix``: The transition matrix estimate of a replica switching
from state i to state j.
"""
# Get replica mixing statistics
mixing_stats = self.analyzer.generate_mixing_statistics()
transition_matrix = {
"eigenvalues": mixing_stats.eigenvalues,
"matrix": mixing_stats.transition_matrix,
}
return transition_matrix
@property
def replica_states(self):
"""
Timeseries of states for each replica.
"""
return self._replica_states
@property
def equilibration_iterations(self):
"""
Number of iterations discarded as equilibration.
"""
return self._equil_iters
@property
def production_iterations(self):
"""
Number of production iterations from which energies are sampled.
"""
return self._prod_iters
@property
def free_energy(self):
"""
The free energy estimate from decorrelated unbiased samples
"""
return self._free_energy
@property
def free_energy_error(self):
"""
The MBAR estimate of the free energy estimate
"""
return self._free_energy_err
@property
def forward_and_reverse_free_energies(self):
"""
The dictionary forward and reverse analysis of the free energies
using the number of samples defined at class initialization
"""
return self._forward_reverse
@property
def free_energy_overlaps(self):
"""
A dictionary containing the estimated overlap matrix and corresponding
eigenvalues and scalars of the free energies.
"""
return self._overlap_matrix
@property
def replica_exchange_statistics(self):
"""
A dictionary containing the estimated replica exchange matrix
and corresponding eigenvalues.
"""
if hasattr(self, "_exchange_matrix"):
return self._exchange_matrix
else:
errmsg = (
"Exchange matrix was not generated, this is likely "
f"{self.sampling_method} is not repex."
)
raise ValueError(errmsg)
@property
def unit_results_dict(self):
results_dict = {
"unit_estimate": self.free_energy,
"unit_estimate_error": self.free_energy_error,
"unit_mbar_overlap": self.free_energy_overlaps,
"forward_and_reverse_energies": self.forward_and_reverse_free_energies,
"production_iterations": self.production_iterations,
"equilibration_iterations": self.equilibration_iterations,
}
if hasattr(self, "_exchange_matrix"):
results_dict["replica_exchange_statistics"] = self.replica_exchange_statistics
return results_dict
def close(self):
self.analyzer.clear()
================================================
FILE: src/openfe/protocols/openmm_utils/omm_compute.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
# Adapted Perses' perses.app.setup_relative_calculation.get_openmm_platform
import logging
import os
import warnings
from typing import Optional
logger = logging.getLogger(__name__)
def get_openmm_platform(
platform_name: Optional[str] = None,
gpu_device_index: Optional[list[int]] = None,
restrict_cpu_count: bool = False,
):
"""
Return OpenMM's platform object based on given name. Setting to mixed
precision if using CUDA or OpenCL.
Parameters
----------
platform_name : Optional[str]
String with the platform name. If None, it will use the fastest
platform supporting mixed precision.
Default ``None``.
gpu_device_index : Optional[list[str]]
GPU device index selection. If ``None`` the default OpenMM
GPU selection will be used.
See the `OpenMM platform properties documentation `_
for more details.
Default ``None``.
restrict_cpu_count : bool
Optional hint to restrict the CPU count to 1 when
``platform_name`` is CPU. This allows Protocols to ensure
that no large performance in cases like vacuum simulations.
Returns
-------
platform : openmm.Platform
OpenMM platform object.
"""
if platform_name is None:
# No platform is specified, so retrieve fastest platform that supports
# 'mixed' precision
from openmmtools.utils import get_fastest_platform
platform = get_fastest_platform(minimum_precision="mixed")
else:
try:
platform_name = {
"cpu": "CPU",
"opencl": "OpenCL",
"cuda": "CUDA",
}[str(platform_name).lower()]
except KeyError:
pass
from openmm import Platform
platform = Platform.getPlatformByName(platform_name)
# Set precision and properties
name = platform.getName()
if name in ["CUDA", "OpenCL"]:
platform.setPropertyDefaultValue("Precision", "mixed")
if gpu_device_index is not None:
index_list = ",".join(str(i) for i in gpu_device_index)
platform.setPropertyDefaultValue("DeviceIndex", index_list)
if name == "CUDA":
platform.setPropertyDefaultValue("DeterministicForces", "true")
if name != "CUDA":
wmsg = (
f"Non-CUDA platform selected: {name}, this may significantly "
"impact simulation performance"
)
warnings.warn(wmsg)
logging.warning(wmsg)
if name == "CPU" and restrict_cpu_count:
threads = os.getenv("OPENMM_CPU_THREADS", "1")
platform.setPropertyDefaultValue("Threads", threads)
return platform
================================================
FILE: src/openfe/protocols/openmm_utils/omm_settings.py
================================================
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe
"""Equilibrium Free Energy Protocols input settings.
This module implements base settings necessary to run
free energy calculations using OpenMM +/- Tools, such
as :mod:`openfe.protocols.openmm_rfe.equil_rfe_methods.py`
and :mod`openfe.protocols.openmm_afe.equil_afe_methods.py`
"""
from typing import Annotated, Literal, Optional, TypeAlias
from gufe.settings import (
OpenMMSystemGeneratorFFSettings as OpenMMSystemGeneratorFFSettings,
)
from gufe.settings import (
Settings as Settings,
)
from gufe.settings import (
SettingsBaseModel,
)
from gufe.settings import (
ThermoSettings as ThermoSettings,
)
from gufe.settings.typing import (
BoxQuantity,
GufeQuantity,
KCalPerMolQuantity,
NanometerArrayQuantity,
NanometerQuantity,
NanosecondQuantity,
PicosecondQuantity,
specify_quantity_units,
)
from openff.interchange.components._packmol import _box_vectors_are_in_reduced_form
from openff.units import unit
from pydantic import ConfigDict, field_validator, model_validator
FemtosecondQuantity: TypeAlias = Annotated[GufeQuantity, specify_quantity_units("femtosecond")]
InversePicosecondQuantity: TypeAlias = Annotated[
GufeQuantity, specify_quantity_units("1/picosecond")
]
TimestepQuantity: TypeAlias = Annotated[GufeQuantity, specify_quantity_units("timestep")]
SurfaceTensionQuantity: TypeAlias = Annotated[GufeQuantity, specify_quantity_units("bar*nanometer")]
class BaseSolvationSettings(SettingsBaseModel):
"""
Base class for SolvationSettings objects.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
class OpenMMSolvationSettings(BaseSolvationSettings):
"""Settings for controlling how a system is solvated using OpenMM tooling.
Defining the number of waters
-----------------------------
The number of waters is controlled by either:
a) defining a solvent padding (``solvent_padding``) in combination
with a box shape
b) defining the number of solvent molecules
(``number_of_solvent_molecules``)
alongside the box shape (``box_shape``)
c) defining the box directly either through the box vectors
(``box_vectors``) or rectangular box lengths (``box_size``)
When using ``solvent_padding``, ``box_vectors``, or ``box_size``,
the exact number of waters added is determined automatically by OpenMM
through :meth:`openmm.app.Modeller.addSolvent` internal heuristics.
Briefly, the necessary volume required by a single water is estimated
and then the defined target cell is packed with waters avoiding clashes
with existing solutes and box edges.
Defining the periodic cell size
-------------------------------
The periodic cell size is defined by one, and only one, of the following:
* ``solvent_padding`` in combination with ``box_shape``,
* ``number_of_solvent_molecules`` in combination with ``box_shape``,
* ``box_vectors``,
* ``box_size``
When using ``number_of_solvent_molecules``, the size of the cell is
defined by :meth:`openmm.app.Modeller.addSolvent` internal heuristics,
automatically selecting a padding value that is large enough to contain
the number of waters based on a geometric estimate of the volume required
by each water molecule.
Defining the periodic cell shape
---------------------------------
The periodic cell shape is defined by one, and only one, of the following:
* ``box_shape``,
* ``box_vectors``,
* ``box_size``
Default settings will create a cubic box, although more space efficient
shapes (e.g. ``dodecahedrons``) are recommended to improve simulation
performance.
Notes
-----
* The number of water molecules added will be affected by the number of
ions defined in SolventComponent. For example, the value of
``number_of_solvent_molecules`` is the sum of the number of counterions
added and the number of water molecules added.
* Solvent addition does not account for any pre-existing waters explicitly
defined in the :class:`openfe.ChemicalSystem`. Any waters will be added
in addition to those pre-existing waters.
* No solvation will happen if a SolventComponent is not passed.
See Also
--------
:mod:`openmm.app.Modeller`
Base class for SolvationSettings objects
"""
solvent_model: Literal["tip3p", "spce", "tip4pew", "tip5p"] = "tip3p"
"""
Force field water model to use when solvating and defining the model
properties (e.g. adding virtual site particles).
Allowed values are: ``tip3p``, ``spce``, ``tip4pew``, and ``tip5p``.
"""
solvent_padding: NanometerQuantity | None = 1.5 * unit.nanometer
"""
Minimum distance from any solute bounding sphere to the edge of the box.
Note
----
* Cannot be defined alongside ``number_of_solvent_molecules``,
``box_size``, or ``box_vectors``.
"""
box_shape: Optional[Literal["cube", "dodecahedron", "octahedron"]] = "dodecahedron"
"""
The shape of the periodic box to create.
Notes
-----
* Must be one of `cube`, `dodecahedron`, or `octahedron`.
* Cannot be defined alongside ``box_vectors`` or ``box_size``.
"""
number_of_solvent_molecules: Optional[int] = None
"""
The number of solvent molecules (water + ions) to add.
Note
----
* Cannot be defined alongside ``solvent_padding``, ``box_size``,
or ``box_vectors``.
"""
box_vectors: BoxQuantity | None = None
"""
`OpenMM reduced form box vectors `.
Notes
-----
* Cannot be defined alongside ``solvent_padding``,
``number_of_solvent_molecules``, or ``box_size``.
See Also
--------
:mod:`openff.interchange.components.interchange`
:mod:`openff.interchange.components._packmol`
"""
box_size: NanometerArrayQuantity | None = None # TODO: make this a better check!
"""
X, Y, and Z lengths of the unit cell for a rectangular box.
Notes
-----
* Cannot be defined alongside ``solvent_padding``,
``number_of_solvent_molecules``, or ``box_vectors``.
"""
@field_validator("box_vectors")
def supported_vectors(cls, v):
if v is not None:
if not _box_vectors_are_in_reduced_form(v):
errmsg = f"box_vectors: {v} are not in OpenMM reduced form"
raise ValueError(errmsg)
return v
@field_validator("solvent_padding")
def is_positive_distance(cls, v):
# these are time units, not simulation steps
if v is None:
return v
if not v.is_compatible_with(unit.nanometer):
raise ValueError("solvent_padding must be in distance units (i.e. nanometers)")
if v < 0:
errmsg = "solvent_padding must be a positive value"
raise ValueError(errmsg)
return v
@field_validator("number_of_solvent_molecules")
def positive_solvent_number(cls, v):
if v is None:
return v
if v <= 0:
errmsg = f"number_of_solvent molecules: {v} must be positive"
raise ValueError(errmsg)
return v
@field_validator("box_size")
def box_size_properties(cls, v):
if v is None:
return v
if v.shape != (3,):
errmsg = f"box_size must be a 1-D array of length 3 got {v} with shape {v.shape}"
raise ValueError(errmsg)
return v
class BasePartialChargeSettings(SettingsBaseModel):
"""
Base class for partial charge assignment.
"""
model_config = ConfigDict(arbitrary_types_allowed=True)
class OpenFFPartialChargeSettings(BasePartialChargeSettings):
"""
Settings for controlling partial charge assignment using the OpenFF tooling
"""
partial_charge_method: Literal["am1bcc", "am1bccelf10", "nagl", "espaloma"] = "am1bcc"
"""
Selection of method for partial charge generation.
Description of options
----------------------
``am1bcc``:
Generate partial charges using the AM1-BCC approach, as detailed
by Araz Jalkalian et al. J. Comp. Chem. 2000.
AM1-BCC charges are either assigned using AmberTools (via SQM)
if ``off_toolkit_backend`` is set to ``ambertools`, or
using the OpenEye Toolkit (via Quacpac) if ``off_toolkit_backend``
is set to ``openeye``. A maximum of one conformer is allowed.
``am1bccelf10``:
Assign AM1-BCC partialk charges using the `ELF10 method
`_
This is only currently possible via the OpenEye toolkit
if setting ``off_toolkit_backend`` to ``openeye``.
We recommend setting ``number_of_conformers`` to at least `500`.
``nagl``:
Assign partial charges using the `OpenFF NAGL ML method
`_
All ``off_toolkit_backend`` options are supported.
A maximum of one conformer is allowed.
``espaloma``:
Assign partial charges using the `Espaloma Charge method