Copy disabled (too large)
Download .txt
Showing preview only (12,977K chars total). Download the full file to get everything.
Repository: bytedance/dplm
Branch: main
Commit: 8a2e15e53416
Files: 319
Total size: 12.3 MB
Directory structure:
gitextract_uly26sb1/
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── analysis/
│ ├── TMalign
│ ├── TMalign.cpp
│ ├── TMscore
│ ├── TMscore.cpp
│ ├── cal_plddt_dir.py
│ ├── cal_tmscore.py
│ ├── motif_analysis.ipynb
│ ├── plddt_calculate.sh
│ ├── plot.ipynb
│ └── uncond_analysis.ipynb
├── configs/
│ ├── callbacks/
│ │ ├── default.yaml
│ │ ├── fixedbb.yaml
│ │ ├── lm.yaml
│ │ └── structok.yaml
│ ├── config.yaml
│ ├── datamodule/
│ │ ├── cath_4.3.yaml
│ │ ├── pdb.yaml
│ │ ├── tokenized_protein.yaml
│ │ ├── uniref50.yaml
│ │ └── uniref50_hf.yaml
│ ├── experiment/
│ │ ├── base.yaml
│ │ ├── dplm/
│ │ │ ├── cond_dplm_150m.yaml
│ │ │ ├── cond_dplm_3b.yaml
│ │ │ ├── cond_dplm_650m.yaml
│ │ │ ├── dplm_150m.yaml
│ │ │ ├── dplm_150m_ds.yaml
│ │ │ ├── dplm_150m_stage2.yaml
│ │ │ ├── dplm_15b_ds.yaml
│ │ │ ├── dplm_30b_ds.yaml
│ │ │ ├── dplm_3b.yaml
│ │ │ ├── dplm_3b_ds.yaml
│ │ │ ├── dplm_3b_stage2.yaml
│ │ │ ├── dplm_650m.yaml
│ │ │ ├── dplm_650m_ds.yaml
│ │ │ ├── dplm_650m_stage2.yaml
│ │ │ └── mlm_150m.yaml
│ │ ├── dplm2/
│ │ │ ├── dplm2_150m.yaml
│ │ │ ├── dplm2_3b.yaml
│ │ │ ├── dplm2_650m.yaml
│ │ │ ├── dplm2_650m_selfmixup.yaml
│ │ │ └── dplm2_bit_650m.yaml
│ │ └── structok/
│ │ ├── inference/
│ │ │ ├── forward_folding.yaml
│ │ │ ├── inverse_folding.yaml
│ │ │ ├── reconstruction.yaml
│ │ │ ├── unconditional.yaml
│ │ │ └── unconditional_codesign.yaml
│ │ └── structok_lfq_8k_pdb_swissprot_c512.yaml
│ ├── hydra/
│ │ └── default.yaml
│ ├── logger/
│ │ ├── tensorboard.yaml
│ │ └── wandb.yaml
│ ├── paths/
│ │ └── default.yaml
│ ├── test.yaml
│ └── trainer/
│ ├── ddp.yaml
│ ├── ddp_bf16.yaml
│ ├── ddp_fp16.yaml
│ ├── deepspeed_zero2.yaml
│ ├── deepspeed_zero2_bf16.yaml
│ ├── deepspeed_zero2_fp16.yaml
│ ├── deepspeed_zero2_offload.yaml
│ ├── deepspeed_zero3.yaml
│ ├── deepspeed_zero3_bf16.yaml
│ └── default.yaml
├── env.yml
├── generate_dplm.py
├── generate_dplm2.py
├── requirements.txt
├── run/
│ ├── scaffold_generate_dplm.py
│ └── scaffold_generate_dplm2.py
├── scripts/
│ ├── download_cath.sh
│ ├── download_metadata.sh
│ ├── download_motif_scaffolds.sh
│ ├── download_pdb_swissprot_hf.sh
│ ├── download_uniref50_hf.sh
│ └── install.sh
├── setup.cfg
├── setup.py
├── src/
│ └── byprot/
│ ├── __init__.py
│ ├── datamodules/
│ │ ├── __init__.py
│ │ ├── cath_datamodule.py
│ │ ├── dataset/
│ │ │ ├── __init__.py
│ │ │ ├── cath.py
│ │ │ ├── data_utils.py
│ │ │ ├── tokenized_protein.py
│ │ │ ├── uniref.py
│ │ │ └── uniref_hf.py
│ │ ├── pdb_dataset/
│ │ │ ├── __init__.py
│ │ │ ├── all_atom.py
│ │ │ ├── pdb_datamodule.py
│ │ │ ├── protein.py
│ │ │ ├── residue_constants.py
│ │ │ └── utils.py
│ │ ├── tokenized_protein_datamodule.py
│ │ ├── uniref50.py
│ │ └── uniref50_hf.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── dplm/
│ │ │ ├── __init__.py
│ │ │ ├── dplm.py
│ │ │ ├── dplm_invfold.py
│ │ │ └── modules/
│ │ │ ├── dplm_adapter.py
│ │ │ ├── dplm_modeling_esm.py
│ │ │ └── gvp_transformer_encoder.py
│ │ ├── dplm2/
│ │ │ ├── __init__.py
│ │ │ ├── dplm2.py
│ │ │ ├── dplm2_bit.py
│ │ │ └── modules/
│ │ │ ├── dplm2_bit_modeling_esm.py
│ │ │ └── dplm2_modeling_esm.py
│ │ ├── structok/
│ │ │ ├── modules/
│ │ │ │ ├── ema.py
│ │ │ │ ├── folding_utils/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── categorical_mixture.py
│ │ │ │ │ ├── decoder.py
│ │ │ │ │ ├── esmfold.py
│ │ │ │ │ ├── misc.py
│ │ │ │ │ ├── pretrained.py
│ │ │ │ │ ├── structure_module.py
│ │ │ │ │ ├── tri_self_attn_block.py
│ │ │ │ │ └── trunk.py
│ │ │ │ ├── gvp_encoder.py
│ │ │ │ ├── lfq.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── nn.py
│ │ │ │ └── vqvae.py
│ │ │ └── structok_lfq.py
│ │ └── utils.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── cross_entropy.py
│ │ ├── metrics.py
│ │ └── protein_metrics.py
│ ├── tasks/
│ │ ├── __init__.py
│ │ ├── lm/
│ │ │ ├── dplm.py
│ │ │ ├── dplm2.py
│ │ │ ├── dplm_invfold.py
│ │ │ └── mlm.py
│ │ └── struct_tokenizer/
│ │ └── structok.py
│ ├── testing_pipeline.py
│ ├── training_pipeline.py
│ └── utils/
│ ├── __init__.py
│ ├── callbacks.py
│ ├── config.py
│ ├── io.py
│ ├── logger.py
│ ├── lr_scheduler.py
│ ├── optim.py
│ ├── protein/
│ │ ├── __init__.py
│ │ ├── all_atom.py
│ │ ├── evaluator_dplm2.py
│ │ ├── folding_model.py
│ │ ├── residue_constants.py
│ │ ├── tokenize_pdb.py
│ │ └── utils.py
│ ├── registry.py
│ ├── scaffold_utils.py
│ └── strategies.py
├── test.py
├── train.py
└── vendor/
└── openfold/
├── CITATION.cff
├── Dockerfile
├── LICENSE
├── README.md
├── deepspeed_config.json
├── environment.yml
├── notebooks/
│ ├── OpenFold.ipynb
│ └── environment.yml
├── openfold/
│ ├── __init__.py
│ ├── config.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── data_modules.py
│ │ ├── data_pipeline.py
│ │ ├── data_transforms.py
│ │ ├── errors.py
│ │ ├── feature_pipeline.py
│ │ ├── input_pipeline.py
│ │ ├── mmcif_parsing.py
│ │ ├── parsers.py
│ │ ├── templates.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── hhblits.py
│ │ ├── hhsearch.py
│ │ ├── jackhmmer.py
│ │ ├── kalign.py
│ │ └── utils.py
│ ├── model/
│ │ ├── __init__.py
│ │ ├── dropout.py
│ │ ├── embedders.py
│ │ ├── evoformer.py
│ │ ├── heads.py
│ │ ├── model.py
│ │ ├── msa.py
│ │ ├── outer_product_mean.py
│ │ ├── pair_transition.py
│ │ ├── primitives.py
│ │ ├── structure_module.py
│ │ ├── template.py
│ │ ├── torchscript.py
│ │ ├── triangular_attention.py
│ │ └── triangular_multiplicative_update.py
│ ├── np/
│ │ ├── __init__.py
│ │ ├── protein.py
│ │ ├── relax/
│ │ │ ├── __init__.py
│ │ │ ├── amber_minimize.py
│ │ │ ├── cleanup.py
│ │ │ ├── relax.py
│ │ │ └── utils.py
│ │ └── residue_constants.py
│ ├── resources/
│ │ ├── __init__.py
│ │ └── stereo_chemical_props.txt
│ └── utils/
│ ├── __init__.py
│ ├── argparse.py
│ ├── callbacks.py
│ ├── checkpointing.py
│ ├── chunk_utils.py
│ ├── exponential_moving_average.py
│ ├── feats.py
│ ├── import_weights.py
│ ├── kernel/
│ │ ├── __init__.py
│ │ ├── attention_core.py
│ │ └── csrc/
│ │ ├── compat.h
│ │ ├── softmax_cuda.cpp
│ │ ├── softmax_cuda_kernel.cu
│ │ └── softmax_cuda_stub.cpp
│ ├── logger.py
│ ├── loss.py
│ ├── lr_schedulers.py
│ ├── precision_utils.py
│ ├── rigid_utils.py
│ ├── script_utils.py
│ ├── seed.py
│ ├── superimposition.py
│ ├── suppress_output.py
│ ├── tensor_utils.py
│ ├── trace_utils.py
│ └── validation_metrics.py
├── run_pretrained_openfold.py
├── scripts/
│ ├── activate_conda_env.sh
│ ├── alignment_db_scripts/
│ │ ├── create_alignment_db.py
│ │ └── unify_alignment_db_indices.py
│ ├── build_deepspeed_config.py
│ ├── colabfold_search.sh
│ ├── convert_of_weights_to_jax.py
│ ├── data_dir_to_fasta.py
│ ├── deactivate_conda_env.sh
│ ├── download_alphafold_dbs.sh
│ ├── download_alphafold_params.sh
│ ├── download_bfd.sh
│ ├── download_cameo.py
│ ├── download_colabfold_envdb.sh
│ ├── download_mgnify.sh
│ ├── download_mmseqs_dbs.sh
│ ├── download_openfold_params.sh
│ ├── download_openfold_params_gdrive.sh
│ ├── download_openfold_params_huggingface.sh
│ ├── download_pdb70.sh
│ ├── download_pdb_mmcif.sh
│ ├── download_roda_pdbs.sh
│ ├── download_small_bfd.sh
│ ├── download_uniclust30.sh
│ ├── download_uniref30.sh
│ ├── download_uniref90.sh
│ ├── flatten_roda.sh
│ ├── generate_alphafold_feature_dict.py
│ ├── generate_chain_data_cache.py
│ ├── generate_mmcif_cache.py
│ ├── install_hh_suite.sh
│ ├── install_third_party_dependencies.sh
│ ├── precompute_alignments.py
│ ├── precompute_alignments_mmseqs.py
│ ├── precompute_embeddings.py
│ ├── prep_mmseqs_dbs.sh
│ ├── prep_proteinnet_msas.py
│ ├── run_unit_tests.sh
│ ├── slurm_scripts/
│ │ └── run_uniclust30_search.sh
│ ├── unpack_proteinnet.py
│ ├── utils.py
│ ├── vars.sh
│ └── zero_to_fp32.py
├── setup.py
├── tests/
│ ├── __init__.py
│ ├── compare_utils.py
│ ├── config.py
│ ├── data_utils.py
│ ├── test_data/
│ │ ├── alignments/
│ │ │ ├── bfd_uniclust_hits.a3m
│ │ │ ├── mgnify_hits.sto
│ │ │ ├── pdb70_hits.hhr
│ │ │ └── uniref90_hits.sto
│ │ ├── alphafold_feature_dict.pickle
│ │ ├── features.pkl
│ │ ├── mmcifs/
│ │ │ ├── 1hf9.cif
│ │ │ ├── 1psm.cif
│ │ │ ├── 2crb.cif
│ │ │ ├── 2q2k.cif
│ │ │ ├── 3u8v.cif
│ │ │ ├── 3zee.cif
│ │ │ ├── 4i6p.cif
│ │ │ ├── 4zey.cif
│ │ │ └── 5kc1.cif
│ │ └── short.fasta
│ ├── test_data_pipeline.py
│ ├── test_data_transforms.py
│ ├── test_embedders.py
│ ├── test_evoformer.py
│ ├── test_feats.py
│ ├── test_import_weights.py
│ ├── test_kernels.py
│ ├── test_loss.py
│ ├── test_model.py
│ ├── test_msa.py
│ ├── test_outer_product_mean.py
│ ├── test_pair_transition.py
│ ├── test_primitives.py
│ ├── test_structure_module.py
│ ├── test_template.py
│ ├── test_triangular_attention.py
│ ├── test_triangular_multiplicative_update.py
│ └── test_utils.py
├── thread_sequence.py
└── train_openfold.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
### VisualStudioCode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
**/.vscode
# JetBrains
.idea/
# Lightning-Hydra-Template
configs/local/default.yaml
configs/local/*
!*/data
/data/
logs/
wandb/
.env
.autoenv
workspace.ipynb
run/logs
# model weight
*.ckpt
# pdb
*.pdb
byprot-checkpoints/
generation-results/
data-bin/scaffolding-pdbs/*.pdb
temp.ipynb
# run/
================================================
FILE: .pre-commit-config.yaml
================================================
default_language_version:
python: python3.9
exclude: ^(docs/|build/|node_modules/|venv/|\.venv/|vendor/)
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
# list of supported hooks: https://pre-commit.com/hooks.html
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: detect-private-key
- id: check-toml
- id: check-case-conflict
- id: check-added-large-files
args: [--maxkb=1000]
- id: check-json
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: fix-byte-order-marker
- id: fix-encoding-pragma
args: [--remove]
- id: mixed-line-ending
args: [--fix=lf]
# python code formatting
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
- id: black
args: [--line-length, "79"]
language_version: python3.9
# python import sorting
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
args:
[
"--line-length=79",
"--multi-line=3",
"--profile=black",
"--filter-files",
]
language_version: python3.9
# python upgrading syntax to newer version
# - repo: https://github.com/asottile/pyupgrade
# rev: v2.32.1
# hooks:
# - id: pyupgrade
# args: [--py38-plus]
# python docstring formatting
- repo: https://github.com/myint/docformatter
rev: v1.4
hooks:
- id: docformatter
args: [--in-place, --wrap-summaries=79, --wrap-descriptions=79]
# python check (PEP8), programming errors and code complexity
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
args:
[
"--extend-ignore",
"E203,E402,E501,F401,F841",
"--exclude",
"logs/*,data/*",
]
# python security linter
- repo: https://github.com/PyCQA/bandit
rev: "1.7.1"
hooks:
- id: bandit
args: ["-s", "B101"]
# yaml formatting
- repo: https://github.com/pre-commit/mirrors-prettier
rev: v2.7.1
hooks:
- id: prettier
types: [yaml]
# shell scripts linter
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.8.0.4
hooks:
- id: shellcheck
# # md formatting
# - repo: https://github.com/executablebooks/mdformat
# rev: 0.7.14
# hooks:
# - id: mdformat
# args: ["--number"]
# additional_dependencies:
# - mdformat-gfm
# - mdformat-tables
# - mdformat_frontmatter
# # - mdformat-toc
# # - mdformat-black
# jupyter notebook cell output clearing
# - repo: https://github.com/kynan/nbstripout
# rev: 0.5.0
# hooks:
# - id: nbstripout
# jupyter notebook linting
# - repo: https://github.com/nbQA-dev/nbQA
# rev: 1.4.0
# hooks:
# - id: nbqa-black
# args: ["--line-length=79"]
# - id: nbqa-isort
# args: ["--profile=black"]
# - id: nbqa-flake8
# args:
# [
# "--extend-ignore=E203,E402,E501,F401,F841",
# "--exclude=logs/*,data/*",
# ]
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
<!-- <div align="center"> -->
<!-- omit in toc -->
# The Family of Diffusion Protein Language Models (DPLM)
<a href="https://pytorch.org/get-started/locally/"><img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-ee4c2c?logo=pytorch&logoColor=white"></a>
<a href="https://pytorchlightning.ai/"><img alt="Lightning" src="https://img.shields.io/badge/-Lightning-792ee5?logo=pytorchlightning&logoColor=white"></a>
<a href="https://hydra.cc/"><img alt="Config: Hydra" src="https://img.shields.io/badge/Config-Hydra-89b8cd"></a>
<a href="https://github.com/ashleve/lightning-hydra-template"><img alt="Template" src="https://img.shields.io/badge/-Lightning--Hydra--Template-017F2F?style=flat&logo=github&labelColor=gray"></a><br>
## Overview 🌟
This repository contains the official implementation of training and inference as well as the pre-trained weights for the Family of Diffusion Protein Language Models (DPLM), including:
- `DPLM` from ICML'24 paper ["Diffusion Language Models Are Versatile Protein Learners"](https://arxiv.org/abs/2402.18567), which introduces **d**iffusion **p**rotein **l**anguage **m**odel (DPLM), a versatile protein language model that demonstrates strong generative and predictive capabilities for protein sequences.
- `DPLM-2` from ICLR'25 paper ["DPLM-2: A Multimodal Diffusion Protein Language Model"](https://arxiv.org/abs/2410.13782), a multimodal protein foundation model that extends discrete diffusion protein language model to accommodate both sequences and structures.
- ICML'25 spotlight paper ["Elucidating the Design Space of Multimodal Protein Language Models"](https://arxiv.org/abs/2504.11454), where we elucidate the challenges of structure modeling of multimodal protein language models (e.g., DPLM-2 and ESM3) and propose advanced designs for better structure modeling. We have released the finer-grained bit-based generative modeling (`DPLM-2 Bit`). The full implementation of the paper will be released soon.
## Key Features 🔑
Specifically, the DPLM family exhibits impressive performance in protein (structure and sequence) co-generation, any-to-any conditional generation (e.g., folding, inverse folding, and motif scaffolding), and representation learning.
We develop DPLM based on the [ByProt](https://github.com/BytedProtein/ByProt). This repository contains pretraining scripts for DPLM and running scripts for various protein generation and understanding tasks, as detailed below:
- **Unconditional protein generation**:
**DPLM** is capable of unconditionally generating protein sequences with reasonable predicted structures. **DPLM-2** can generate diverse and highly plausible proteins through simultaneous structure-sequence co-generation.
- **Sequence-conditioned generation (forward folding)**:
DPLM-2 can generate reasonable protein structure given the input protein sequence, achieving close performance with the strong folding model (e.g., ESMFold).
- **Structure-conditioned generation (inverse folding)**:
DPLM and DPLM-2 can produce sequences that can confidently fold into the given backbone structure.
- **Motif scaffolding**:
DPLM can generate reasonable scaffold sequences given specific functional motifs. DPLM-2 achieves more successful motif scaffolding through multimodal motif conditioning.
- **Representation learning**:
DPLM is a superior protein sequence representation learner, while DPLM-2 offers structure-aware protein represenrations, demonstrating impressive performance across a variety of protein predictive tasks.
- **Controllable generation**:
DPLM enjoys plug-and-play programmability, generating samples satisfying provided secondary structure annotations.
**TODOs**
- [ ] Controllable/guided generation with discrete diffusion classifier guidance.
- [ ] Representation learning of DPLM-2
## DPLM
> ["Diffusion Language Models Are Versatile Protein Learners." Wang et al., In ICML 2024](https://arxiv.org/abs/2402.18567)

## DPLM-2
> ["DPLM-2: A Multimodal Diffusion Protein Language Model." Wang et al., In ICLR 2025](https://arxiv.org/abs/2410.13782)

## Updates 📢
- **[2025-07]** We update the default sampling strategy of **DPLM-2** to `annealing@2.0:0.1`.
- **[2025-04]** Our latest work **DPLM-2.1**, which focuses on analysis and better protein structure modeling of multimodal protein language models, is accepted to ICML'25 Spotlight! Check [Elucidating the Design Space of Multimodal Protein Language Models](https://arxiv.org/abs/2504.11454). We have release the implementation of finer-grained and better structure modeling (**DPLM-2 Bit**). The full implementation will be released soon.
- **[2024-10]** Check out our new work [DPLM-2](https://arxiv.org/abs/2410.13782), a multimodal protein foundation model that extends DPLM to simultaneously model, understand, and generate both sequences and structures!
- **[2024-03]** We release [DPLM](https://arxiv.org/abs/2402.18567), a versatile protein language model that demonstrates strong generative and predictive capabilities for protein sequences!
## Table of Contents 📚
- [Quick Start](#quick-start)
- [Installation](#installation)
- [Load Pretrained Models](#load-pretrained-models)
- [Generation Examples](#generation-examples)
- [Model Checkpoints](#model-checkpoints)
- [Advanced Usage](#advanced-usage)
- [Training](#training)
- [Unconditional protein (co-)generation](#unconditional-protein-co-generation)
- [Protein sequence generation (DPLM)](#protein-sequence-generation-dplm)
- [Protein sequence-structure co-generation (DPLM-2 & DPLM-2-Bit)](#protein-sequence-structure-co-generation-dplm-2--dplm-2-bit)
- [Sequence-conditioned Generation: Forward Folding](#sequence-conditioned-generation-forward-folding)
- [Structure-conditioned generation: inverse folding](#structure-conditioned-generation-inverse-folding)
- [Motif scaffolding](#motif-scaffolding)
- [Representation Learning](#representation-learning)
- [Acknowledgements](#acknowledgements)
- [Citation](#citation)
# Quick Start
## Installation
```bash
# clone project
git clone --recursive https://url/to/this/repo/dplm.git
cd dplm
# create conda virtual environment
env_name=dplm
conda create -n ${env_name} python=3.9 pip
conda activate ${env_name}
# automatically install everything else
bash scripts/install.sh
```
## Load Pretrained Models
Users can load DPLM/DPLM-2 checkpoint by:
```python
from byprot.models.dplm import DiffusionProteinLanguageModel as DPLM
from byprot.models.dplm2 import MultimodalDiffusionProteinLanguageModel as DPLM2
from byprot.models.dplm2 import DPLM2Bit
dplm = DPLM.from_pretrained("airkingbd/dplm_650m").cuda()
dplm2 = DPLM2.from_pretrained("airkingbd/dplm2_650m").cuda()
dplm2_bit = DPLM2Bit.from_pretrained("airkingbd/dplm2_bit_650m").cuda()
```
## Generation Examples
**Protein sequence generation**
```python
from generate_dplm import initialize_generation
input_tokens = initialize_generation(
length=200,
num_seqs=5,
tokenizer=dplm.tokenizer,
device=next(dplm.parameters()).device
)
samples = dplm.generate(
input_tokens=input_tokens,
max_iter=500,
)
print([''.join(seq.split(' ')) for seq in dplm.tokenizer.batch_decode(samples, skip_special_tokens=True)])
```
**Protein sequence-structure co-generation**
User can check the generated sequence and structure in the `./generation-results` folder.
```python
from generate_dplm2 import initialize_generation, save_results
input_tokens = initialize_generation(
task="co_generation",
length=200,
num_seqs=5,
tokenizer=dplm2.tokenizer,
device=next(dplm2.parameters()).device
)[0]
samples = dplm2.generate(
input_tokens=input_tokens,
max_iter=500,
)
save_results(
outputs=samples,
task="co_generation",
save_dir="./generation-results/dplm2_generation",
tokenizer=dplm2.tokenizer,
struct_tokenizer=dplm2.struct_tokenizer, save_pdb=True
)
samples = dplm2_bit.generate(
input_tokens=input_tokens,
max_iter=500,
)
save_results(
outputs=samples,
task="co_generation",
save_dir="./generation-results/dplm2_bit_generation",
tokenizer=dplm2_bit.tokenizer,
struct_tokenizer=dplm2_bit.struct_tokenizer
)
```
## Model Checkpoints
Access pretrained models in varying sizes:
| Model name | Model size |
| ------------------------------------------------------------ | --------------- |
| [dplm-150m](https://huggingface.co/airkingbd/dplm_150m/tree/main) | 150M parameters |
| [dplm-650m](https://huggingface.co/airkingbd/dplm_650m/tree/main) | 650M parameters |
| [dplm-3b](https://huggingface.co/airkingbd/dplm_3b/tree/main) | 3B parameters |
| [dplm2-150m](https://huggingface.co/airkingbd/dplm2_150m/tree/main) | 150M parameters |
| [dplm2-650m](https://huggingface.co/airkingbd/dplm2_650m/tree/main) | 650M parameters |
| [dplm2-3b](https://huggingface.co/airkingbd/dplm2_3b/tree/main) | 3B parameters |
| [dplm2-bit-650m](https://huggingface.co/airkingbd/dplm2_bit_650m/tree/main) | 650M parameters |
# Advanced Usage
## Training
<!-- omit in toc -->
### DPLM
<!-- omit in toc -->
#### Dataset
We pretrain DPLM on the UniRef50 dataset, which contains about 42 million protein sequences. We obtain the preprocessed UniRef50 dataset provided by [EvoDiff (Alamdari et al, 2023)](https://www.biorxiv.org/content/10.1101/2023.09.11.556673v1), which can be downloaded from this [link](https://zenodo.org/record/6564798). After downloading, please place the dataset in the `./data-bin/uniref50` folder.
We also provide the preprocessed dataset in [HuggingFace datasets](https://huggingface.co/datasets/airkingbd/uniref50) format, which we recommend to use. User can download the HF dataset locally in advance for faster loading by:
```bash
bash scripts/download_uniref50_hf.sh
```
<!-- omit in toc -->
#### Example of training
We train DPLM with approximately 1 million tokens per batch for 100,000 training steps.
The following command is run on one node with 8 A100 GPUs. If you want to train on multiple nodes, you can adjust the total number of tokens by ensuring that `max_tokens` \* `accumulate_grad_batches`\*`#GPUs` is approximately 1 million.
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
max_tokens=8192
accumulate_grad_batches=16
# this means the effective batch size is #GPUs(8) * max_tokens(8192) * accumulate_grad_batches(16), resulting in approximately 1 million.
exp=dplm/dplm_650m
model_name=dplm_650m
python train.py \
experiment=${exp} name=${model_name} \
datamodule.max_tokens=${max_tokens} \
trainer.accumulate_grad_batches=${accumulate_grad_batches}
```
You can adjust the other training configurations in the `configs/experiment/dplm/dplm_650m.yaml` as needed.
<!-- omit in toc -->
### DPLM-2
<!-- omit in toc -->
#### Dataset
We use the experimental structures from [PDB](https://pubmed.ncbi.nlm.nih.gov/10592235/) and AF2-predicted structures from [SwissProt](https://academic.oup.com/nar/article/50/D1/D439/6430488) dataset as training data for DPLM-2. We provide a preprocessed [HuggingFace dataset](https://huggingface.co/datasets/airkingbd/pdb_swissprot) of PDB and SwissProt. User can download the HF dataset locally in advance for faster loading by:
```bash
bash scripts/download_pdb_swissprot.sh
```
<!-- omit in toc -->
#### Example of training
As noted in section 3.2 in [DPLM-2](https://arxiv.org/abs/2410.13782) paper, we propose an efficient warm-up training strategy to mitigate the scarcity of structure training data. During training, we initialize the DPLM-2 model with pretrained DPLM checkpoint, to leverage the evolutionary knowledge captured by sequence-based pLM during large-scale sequence pretraining, which is beneficial for structure modeling.
We train DPLM-2 with approximately 64,000 tokens per batch for 100,000 training steps. To preserve the evolutionary knowledge captured by DPLM, we use the [LoRA](github.com/peft) to prevent large parameter shifts. The training command is as follows:
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
max_tokens=8192
accumulate_grad_batches=1
# this means the effective batch size is #GPUs(8) * max_tokens(8192) * accumulate_grad_batches(1), resulting in approximately 64 thousand.
exp=dplm2/dplm2_650m
model_name=dplm2_650m
python train.py \
experiment=${exp} name=${model_name} \
datamodule.max_tokens=${max_tokens} \
trainer.accumulate_grad_batches=${accumulate_grad_batches}
```
<!-- omit in toc -->
### DPLM-2 Bit-based Modeling
In our latest work [DPLM-2.1](https://arxiv.org/abs/2504.11454), we show that the index-based structure token is challenging for the model to predict. A finer-grained, bit-based modeling approach in the latent space (i.e., predicting each bit of the quantized structure feature instead of the index) leads to better structural modeling and generation performance.
The training dataset is same to DPLM-2, and the training command is as below:
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
max_tokens=8192
accumulate_grad_batches=1
# this means the effective batch size is #GPU(8) * max_tokens(8192) * accumulate_grad_batches(1), resulting in approximately 64 thousand.
exp=dplm2/dplm2_bit_650m
model_name=dplm2_bit_650m
python train.py \
experiment=${exp} name=${model_name} \
datamodule.max_tokens=${max_tokens} \
trainer.accumulate_grad_batches=${accumulate_grad_batches}
```
## Unconditional protein (co-)generation
### Protein sequence generation (DPLM)
The results of unconditional protein sequence generation of DPLM of different scales (150M, 650M, 3B) are shown in the table below. For more details, please refer to our paper.
| Length | 100 | 200 | 300 | 400 | 500 | 600 | 700 | 800 | 900 | 1000 |
| ------ | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | -------------- | -------------- |
| 150M | 73.31 | 84.30 | 84.82 | 86.90 | 81.71 | 81.53 | 81.56 | 80.92 | 78.71 | 72.10 |
| 650M | 74.00 (+0.69) | 85.61 (+1.31) | 85.91 (+1.09) | 88.16 (+1.26) | 82.58 (+0.87) | 84.38 (+2.85) | 83.87 (+2.31) | 83.00 (+2.08) | 84.92 (+6.21) | 81.51 (+9.41) |
| 3B | 77.78 (+4.47) | 86.16 (+1.86) | 87.39 (+2.57) | 90.06 (+3.16) | 87.43 (+5.72) | 86.01 (+4.48) | 84.64 (+3.08) | 85.88 (+4.96) | 85.93 (+7.22) | 83.86 (+11.76) |
To generate new protein sequences using a pre-trained DPLM model:
```bash
model_name=dplm_650m # choose from dplm_150m, dplm_650m, dplm_3b
output_dir=generation-results/${model_name}/uncond_generation
mkdir -p generation-results
python generate_dplm.py --model_name airkingbd/${model_name} \
--seq_lens 100 200 300 400 500 \
--saveto ${output_dir}
# Evaluation
bash anylasis/plddt_calculate.sh ${output_dir} # compute pLDDT using ESMFold
```
We also provide evaluation scripts in the `analysis` folder. Users can use the `analysis/uncond_analysis.ipynb` to obtain average pLDDT score of each length and draw the line chart of the pLDDT score.
### Protein sequence-structure co-generation (DPLM-2 & DPLM-2-Bit)
DPLM-2 can generate diverse and highly-plausible protein with simultaneous structure-sequence co-generation.
<!--  -->
<img src="./assets/co_generation.png" alt="Descriptive text for your image" width="400">
User can co-generate sequence and structure simultaneously with the command below:
```bash
# choose from dplm2_150m, dplm2_650m, dplm2_3b
model_name=dplm2_650m
# About the default sampling strategy, annealing@2.0:0.1,
# which anneals the temperature from 2.0 to 0.1.
# It begins with high randomness to maximize diversity
# and concludes with low randomness to ensure designability.
# This achieves a better trade-off between the quality and diversity.
sampling_strategy=annealing@2.0:0.1
output_dir=generation-results/${model_name}
task=co_generation
mkdir -p ${output_dir}
python generate_dplm2.py \
--model_name airkingbd/${model_name} \
--task ${task} \
--sampling_strategy ${sampling_strategy} \
--num_seqs 50 \
--max_iter 500 \
--seq_lens 100 200 300 400 500 \
--saveto ${output_dir}
# Evaluation
input_fasta_dir=${output_dir}/co_generation
python src/byprot/utils/protein/evaluator_dplm2.py -cn unconditional_codesign \
inference.input_fasta_dir=${input_fasta_dir}
```
User can use `analysis/plot.ipynb` to plot the rmsd, tmscore distribution and diversity of each length.
Co-generate sequence and structure with dplm-2 bit modeling variant:
```bash
model_name=dplm2_bit_650m
sampling_strategy=annealing@1.1:0.1
output_dir=generation-results/${model_name}
task=co_generation
mkdir -p ${output_dir}
python generate_dplm2.py \
--model_name airkingbd/${model_name} \
--task ${task} \
--bit_model \
--sampling_strategy ${sampling_strategy} \
--num_seqs 50 \
--max_iter 500 \
--seq_lens 100 200 300 400 500 \
--saveto ${output_dir}
```
## Sequence-conditioned Generation: Forward Folding
DPLM-2 spontaneously enables protein structure prediction given sequence (i.e., folding) in a zero-shot manner.
We use the [CAMEO 2022 (provided by EigenFold)](https://github.com/bjing2016/EigenFold) and a [PDB date split (provided by MultiFlow)](https://github.com/jasonkyuyim/multiflow) as testsets, and we provide our preprocessed dataset in this [link](https://zenodo.org/records/15424801), and can be downloaded by:
```bash
bash script/download_metadata.sh
```
Partial results are shown in the table below. For more details, please refer to [DPLM-2.1](https://arxiv.org/abs/2504.11454) paper.
| Models | CAMEO 2022 | | PDB date | |
|---|---|---|---|---|
| | rmsd | tmscore | Rmsd | tmscore |
| ESMFold | 3.99 | 0.85 | 2.84 | 0.93 |
| DPLM-2 | 7.70 | 0.79 | 5.30 | 0.83 |
| DPLM-2 Bit | 6.40 | 0.84 | 3.22 | 0.90 |
The folding generation and evaluation script is as follows.
We utilize RMSD and TMscore between the predicted and ground truth structures for evaluation. DPLM-2 adopts argmax decoding for 100 sampling iterations.
```bash
model_name=dplm2_650m
output_dir=generation-results/${model_name}
task=folding
mkdir -p ${output_dir}
input_fasta_path=data-bin/cameo2022/aatype.fasta
python generate_dplm2.py \
--model_name airkingbd/${model_name} \
--task ${task} \
--input_fasta_path ${input_fasta_path} \
--max_iter 100 \
--unmasking_strategy deterministic \
--sampling_strategy argmax \
--saveto ${output_dir}
# Evaluation
input_fasta_dir=${output_dir}/folding
python src/byprot/utils/protein/evaluator_dplm2.py -cn forward_folding inference.input_fasta_dir=${input_fasta_dir}
```
For structure prediction conditioned on other customized sequences, users can input a FASTA file and modify the `input_fasta_path` variable to generate the predicted structure.
## Structure-conditioned generation: inverse folding
DPLM family can perform inverse folding in different ways according to DPLM variant. DPLM performs inverse folding by placing an adapter layer on the top of pLM, similar to [LM-Design](https://github.com/BytedProtein/ByProt). On the other hand, DPLM-2 directly conditions on the tokenized structure tokens to predict the sequence.
<!-- omit in toc -->
### Inverse Folding with DPLM
Partial results on the CATH 4.3 dataset are shown in the table below. For more details, please refer to our paper.
| Models | Trainable Params. | AAR | scTM | pLDDT |
|-----------|-------------------|-----------|----------|-----------|
| LM-Design | 6.3M/650M | 56.49 | 0.85 | 74.89 |
| DPLM-150M | 3.1M/150M | 53.27 | 0.85 | 75.31 |
| DPLM-650M | 6.3M/650M | _56.61_ | _0.86_ | _76.78_ |
| DPLM-3B | 68.2M/3.0B | **58.64** | **0.86** | **76.95** |
<!-- omit in toc -->
#### Data
**Download the preproceesd CATH datasets**
- CATH 4.2 dataset provided by [Generative Models for Graph-Based Protein Design (Ingraham et al, NeurIPS'19)](https://papers.nips.cc/paper/2019/hash/f3a4ff4839c56a5f460c88cce3666a2b-Abstract.html)
- CATH 4.3 dataset provided by [Learning inverse folding from millions of predicted structures (Hsu et al, ICML'22)](https://www.biorxiv.org/content/10.1101/2022.04.10.487779v1)
```bash
bash scripts/download_cath.sh
```
<!-- omit in toc -->
#### Training
We train structure-conditional DPLM based on the [LM-Design](https://github.com/BytedProtein/ByProt) framework, designating the pre-trained protein language model as DPLM. The training script is as below.
```bash
exp=dplm/dplm_650m_invfold
dataset=cath_4.3
name=${dataset}/dplm_650m/invfold
python train.py \
experiment=${exp} datamodule=${dataset} name=${name} \
logger=tensorboard trainer=ddp_fp16
```
<!-- omit in toc -->
#### Evaluation on valid/test datasets
Users can set the `eval_sc` to `true` to calculate the self-consistency TMscore and pLDDT, which will result in a significant evaluation time overhead.
```bash
dataset=cath_4.3
exp_path=${dataset}/dplm_650m/invfold
eval_sc=false
# if set ${eval_sc} to true, the program will calculate the self-consistency
# TMscore and pLDDT during generation,
# thus siginificantly increase the evaluation time.
python test.py \
experiment_path=${exp_path} \
data_split=test ckpt_path=best.ckpt mode=predict \
task.generator.max_iter=100 task.generator.eval_sc=${eval_sc}
```
<!-- omit in toc -->
### Inverse Folding with DPLM-2
We provide the CAMEO 2022 and PDB date test set split used in our paper, where the structure has been tokenized and saved to `data-bin/cameo2022/struct.fasta` and `data-bin/PDB_date/struct.fasta`.
User can use the following script to do the inverse folding and evaluation.
```bash
model_name=dplm2_650m
output_dir=generation-results/${model_name}
task=inverse_folding
mkdir -p ${output_dir}
input_fasta_path=data-bin/cameo2022/struct.fasta
python generate_dplm2.py \
--model_name airkingbd/${model_name} \
--task ${task} \
--input_fasta_path ${input_fasta_path} \
--max_iter 100 \
--unmasking_strategy deterministic \
--sampling_strategy argmax \
--saveto ${output_dir}
# Evaluation
input_fasta_dir=${output_dir}/inverse_folding
python src/byprot/utils/protein/evaluator_dplm2.py -cn inverse_folding inference.input_fasta_dir=${input_fasta_dir}
```
For any customized input structure, user can first tokenize the structure with structure tokenizer and save it to a FASTA file using the following script:
```bash
# Tokenize
# each protein is represented by a pdb file
input_pdb_folder=/path/to/your/input/structure
# this will save two fasta files in the ${input_pdb_folder}/tokenized_protein folder:
# 1) struct.fasta, containing the tokenized structure tokens
# 2) aatype.fasta, containing the amino acid tokens.
python src/byprot/utils/protein/tokenize_pdb.py --input_pdb_folder ${input_pdb_folder} --output_dir ${input_pdb_folder}/tokenized_protein
```
Then user can specify the path of generated `struct.fasta` as input and predict the sequence.
## Motif scaffolding
DPLM and DPLM-2 can both perform motif scaffolding. DPLM can condition on the motif sequence and predict the scaffold sequence. DPLM-2 is able to condition on both the sequence and structure of the motif and simultaneously co-generate the sequence and structure of the scaffold part, which leads to better performance.
We examine on the benchmark, provided by [FrameFlow](https://github.com/microsoft/protein-frame-flow/blob/main/motif_scaffolding/benchmark.csv). We use the motif pdb files which are provided by [EvoDiff](https://github.com/microsoft/evodiff/tree/main/examples/scaffolding-pdbs), and we also provide the pdbs and the corresponding structure tokens in this [link](https://zenodo.org/records/15424801). You can download the dataset by
```bash
bash scripts/download_motif_scaffolds.sh
```
For each motif-scaffolding problem, we sample 100 sequences and then calculate the success rate according to two aspects: motif part consistency and overall quality. For motif part consistency, we use the motif-RMSD < 1$\AA$ as the success criterion. For overall quality, the assessment varies across different approaches: sequence-based method (DPLM) we use pLDDT > 70, while for co-generation method (DPLM-2) we use scTM > 0.8. For more details, please refer to our paper.
The success rate of each motif-scaffold problem is shown below.
| | Pass rate | Avg. Success rate | 1BCF | 1PRW | 1QJG | 1YCR | 2KL8 | 3IXT | 4JHW | 4ZYP | 5IUS | 5TPN | 5TRV_long | 5TRV_med | 5TRV_short | 5WN9 | 5YUI | 6E6R_long | 6E6R_med | 6E6R_short | 6EXZ_long | 6EXZ_med | 6EXZ_short | 7MRX_long | 7MRX_med | 7MRX_short |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DPLM | 11/24 | 0.19 | 0.00 | 0.83 | 0.00 | 0.38 | 0.08 | 0.17 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.65 | 0.94 | 0.87 | 0.01 | 0.00 | 0.00 | 0.02 | 0.31 | 0.34 |
| DPLM-2 | 18/24 | 0.29 | 0.01 | 0.84 | 0.02 | 0.53 | 0.57 | 0.41 | 0.00 | 0.10 | 0.00 | 0.00 | 0.00 | 0.02 | 0.03 | 0.00 | 0.00 | 0.78 | 0.77 | 0.64 | 0.44 | 0.55 | 0.58 | 0.20 | 0.22 | 0.24 |
<!-- omit in toc -->
### DPLM
We provide the following script to sample sequences for each motif-scaffolding problem. Note that before generation, you should download the motif pdbs and place them in the `data-bin/scaffolding-pdbs` folder.
```bash
export CUDA_VISIBLE_DEVICES=0
model_name=dplm_650m
output_dir=./generation-results/${model_name}/motif_scaffold
mkdir -p generation-results
# Generate scaffold
python run/scaffold_generate_dplm.py \
--model_name airkingbd/${model_name} \
--num_seqs 100 \
--saveto $output_dir
# Predict structure by ESMFold
max_tokens=1024
pdb_path=$output_dir/scaffold_fasta/esmfold_pdb
# folding
mkdir -p $pdb_path
echo 'folding by ESMFold'
output_filename_list=$(ls ${output_dir}/scaffold_fasta)
echo $output_filename_list
python analysis/cal_plddt_dir.py -i ${output_dir}/scaffold_fasta -o ${pdb_path} --max-tokens-per-batch ${max_tokens}
```
For evaluation, users can use the `analysis/motif_analysis.ipynb` to obtain success rate of each problem.
<!-- omit in toc -->
### DPLM-2
Before generation, the FASTA file of tokenized structure tokens and amino acid tokens of the motif should be in the `data-bin/scaffolding-pdbs` folder. Users can co-generate the scaffold sequence and structure, conditioning on the sequence and structure of the motif part.
```bash
export CUDA_VISIBLE_DEVICES=0
model_name=dplm2_650m
output_dir=./generation-results/${model_name}/motif_scaffold
mkdir -p generation-results
# Generate scaffold
python run/scaffold_generate_dplm2.py \
--model_name airkingbd/${model_name} \
--num_seqs 100 \
--saveto ${output_dir}
# Predict structure by ESMFold
max_tokens=1024
python analysis/cal_plddt_dir.py -i ${output_dir}/scaffold_fasta --max-tokens-per-batch ${max_tokens}
# Calculate sc-TMscore
python src/byprot/utils/protein/evaluator_dplm2.py -cn unconditional_codesign \
inference.input_fasta_dir=${output_dir}/scaffold_fasta inference.calculate_diversity=false
```
For evaluation, users can use the `analysis/motif_analysis.ipynb` to obtain success rate of each problem.
## Representation Learning
The DPLM family excels in various downstream protein predictive tasks. DPLM is a superior protein sequence representation learner, while DPLM-2 can perform multimodal representation learning by leveraging both structure and sequence information, demonstrating its versatility and effectiveness. The following table summarizes the DPLM family performance, and the italic number means performance of DPLM-2, which offers structure-aware protein representations and outperforms sequence-based DPLM on most of the predictive tasks. Meanwhile, we also find the performance improves along with the model size.
| Models | Thermostability | HumanPPI | Metal Ion Binding | EC | GO-MF | GO-BP | GO-CC | DeepLoc-Subcellular | DeepLoc-Binary |
| --------------------- | --------------- | --------- | ----------------- | --------- | :-------: | :-------: | :-------: | ------------------- | -------------- |
| ESM2 (650M) | 0.691 | 84.78 | 71.88 | 0.866 | 0.676 | 0.344 | 0.402 | 83.68 | 92.28 |
| AR-LM | 0.638 | 68.48 | 61.66 | 0.691 | 0.566 | 0.258 | 0.287 | 68.53 | 88.31 |
| DPLM (150M) | 0.687 | 80.98 | 72.17 | 0.822 | 0.662 | 0.328 | 0.379 | 82.41 | 92.63 |
| DPLM (650M) | 0.695 | 86.41 | 75.15 | 0.875 | 0.680 | 0.357 | 0.409 | 84.56 | 93.09 |
| DPLM-2 (650M) | **_0.714_** | _84.44_ | _74.28_ | _0.878_ | _0.680_ | _0.359_ | _0.411_ | 82.98 | _93.64_ |
| *DPLM-2 (650M) | -- | _87.78_ | -- | --| --| -- | -- | _83.42_ | -- |
| DPLM (3B) | 0.704 | **90.00** | **75.94** | **0.883** | **0.687** | **0.369** | **0.463** | **85.32** | **93.93** |
> We find DPLM-2 demonstrates a performance degradation on some tasks (e.g., HumanPPI and DeepLoc-Subcellular), due to continue training on smaller magnitude of structure data and result in overfitting and degradation of the representations learned during large-scale sequence pretraining. \* means training on the larger-scale [AFDB representative](https://www.nature.com/articles/s41586-023-06510-w) structure data, and we find that enlarging structure data is indeed a key factor for better multimodal protein representations. Please refer to DPLM-2 paper for more details about this.
The training and evaluation pipeline is based on the [SaProt](https://github.com/westlake-repl/SaProt/tree/main) repository, and we slightly modify the code to support DPLM. Users can select the "representationlearning" branch for the evaluation of protein predictive tasks.
# Acknowledgements
DPLM extends its gratitude to the following projects and individuals.
We draw inspiration and leverages/modifies implementations from:
- [microsoft/evodiff](https://github.com/microsoft/evodiff) for the preprocessed UniRef50 dataset, sequence sampling evaluation implementation and data pipeline.
- [westlake-repl/SaProt](https://github.com/westlake-repl/SaProt/tree/main) for the representation learning evaluation pipeline.
- [jingraham/neurips19-graph-protein-design](https://github.com/jingraham/neurips19-graph-protein-design) for the preprocessed CATH dataset.
- [facebook/esm](https://github.com/facebookresearch/esm/) for their ESM implementations and pretrained model weights.
- [jasonkyuyim/se3_diffusion](https://github.com/jasonkyuyim/se3_diffusion) for their self-consistency structural evaluation implementation.
- [jasonkyuyim/multiflow](https://github.com/jasonkyuyim/multiflow) for their evaluation pipeline, structure data processing and preprocessed PDB dataset.
- [bjing2016/EigenFold](https://github.com/bjing2016/EigenFold) for the CAMEO 2022 dataset.
We express our sincere appreciation to the authors of these repositories for their invaluable contributions to the development of DPLM family.
# Citation
```
@inproceedings{wang2024dplm,
title={Diffusion Language Models Are Versatile Protein Learners},
author={Wang, Xinyou and Zheng, Zaixiang and Ye, Fei and Xue, Dongyu and Huang, Shujian and Gu, Quanquan},
booktitle={International Conference on Machine Learning},
year={2024}
}
@inproceedings{wang2025dplm2,
title={DPLM-2: A Multimodal Diffusion Protein Language Model},
author={Wang, Xinyou and Zheng, Zaixiang and Ye, Fei and Xue, Dongyu and Huang, Shujian and Gu, Quanquan},
booktitle={International Conference on Learning Representations},
year={2025}
}
@inproceedings{hsieh2025dplm2_1,
title={Elucidating the Design Space of Multimodal Protein Language Models},
author={Hsieh, Cheng-Yen and Wang, Xinyou and Zhang, Daiheng and Xue, Dongyu and Ye, Fei and Huang, Shujian and Zheng, Zaixiang and Gu, Quanquan},
booktitle={International Conference on Machine Learning},
year={2025}
}
```
================================================
FILE: analysis/TMalign.cpp
================================================
/* TM-align: sequence-independent structure alignment of monomer proteins by
* TM-score superposition. Please report issues to yangzhanglab@umich.edu
*
* References to cite:
* Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)
*
* DISCLAIMER:
* Permission to use, copy, modify, and distribute the Software for any
* purpose, with or without fee, is hereby granted, provided that the
* notices on the head, the reference information, and this copyright
* notice appear in all copies or substantial portions of the Software.
* It is provided "as is" without express or implied warranty.
*
* ==========================
* How to install the program
* ==========================
* The following command compiles the program in your Linux computer:
*
* g++ -static -O3 -ffast-math -lm -o TMalign TMalign.cpp
*
* The '-static' flag should be removed on Mac OS, which does not support
* building static executables.
*
* ======================
* How to use the program
* ======================
* You can run the program without argument to obtain the document.
* Briefly, you can compare two structures by:
*
* ./TMalign structure1.pdb structure2.pdb
*
* ==============
* Update history
* ==============
* 2012/01/24: A C/C++ code of TM-align was constructed by Jianyi Yang
* 2016/05/21: Several updates of this program were made by Jianji Wu:
* (1) fixed several compiling bugs
* (2) made I/O of C/C++ version consistent with the Fortran version
* (3) added outputs including full-atom and ligand structures
* (4) added options of '-i', '-I' and '-m'
* 2016/05/25: Fixed a bug on PDB file reading
* 2018/06/04: Several updates were made by Chengxin Zhang, including
* (1) Fixed bug in reading PDB files with negative residue index,
* (2) Implemented the fTM-align algorithm (by the '-fast' option)
* as described in R Dong, S Pan, Z Peng, Y Zhang, J Yang
* (2018) Nucleic acids research. gky430.
* (3) Included option to perform TM-align against a whole
* folder of PDB files. A full list of options not available
* in the Fortran version can be explored by TMalign -h
* 2018/07/27: Added the -byresi option for TM-score superposition without
* re-alignment as in TMscore and TMscore -c
* 2018/08/07: Added the -dir option
* 2018/08/14: Added the -split option
* 2018/08/16: Added the -infmt1, -infmt2 options.
* 2019/01/07: Added support for PDBx/mmCIF format.
* 2019/02/09: Fixed asymmetric alignment bug.
* 2019/03/17: Added the -cp option for circular permutation
* 2019/07/23: Supported RasMol output by '-o' option
* 2019/07/24: Fixed bug on PyMOL format output by '-o' option with mmCIF input
* 2019/08/18: Fixed bug on RasMol format output file *_atm. Removed excessive
* circular permutation alignment by -cp
* 2019/08/20: Clarified PyMOL syntax.
* 2019/08/22: Added four additional PyMOL scripts.
* 2020/12/12: Fixed bug in double precision coordinate cif file alignment.
* 2021/02/24: Fixed file format issue for new incentive PyMOL.
* 2022/04/12: Compatible with AlphaFold CIF
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <malloc.h>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <vector>
#include <iterator>
#include <algorithm>
#include <string>
#include <map>
using namespace std;
void print_version()
{
cout <<
"\n"
" *********************************************************************\n"
" * TM-align (Version 20220412): protein structure alignment *\n"
" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n"
" * Please email comments and suggestions to yangzhanglab@umich.edu *\n"
" *********************************************************************"
<< endl;
}
void print_extra_help()
{
cout <<
"Additional options:\n"
" -dir Perform all-against-all alignment among the list of PDB\n"
" chains listed by 'chain_list' under 'chain_folder'. Note\n"
" that the slash is necessary.\n"
" $ TMalign -dir chain_folder/ chain_list\n"
"\n"
" -dir1 Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
" under 'chain1_folder'. Note that the slash is necessary.\n"
" $ TMalign -dir1 chain1_folder/ chain1_list chain2\n"
"\n"
" -dir2 Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
" under 'chain2_folder'\n"
" $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
"\n"
" -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n"
" add file name suffix to files listed by chain1_list or chain2_list\n"
"\n"
" -atom 4-character atom name used to represent a residue.\n"
" Default is \" CA \" for proteins\n"
" (note the spaces before and after CA).\n"
"\n"
" -ter Strings to mark the end of a chain\n"
" 3: (default) TER, ENDMDL, END or different chain ID\n"
" 2: ENDMDL, END, or different chain ID\n"
" 1: ENDMDL or END\n"
" 0: (default in the first C++ TMalign) end of file\n"
"\n"
" -split Whether to split PDB file into multiple chains\n"
" 0: (default) treat the whole structure as one single chain\n"
" 1: treat each MODEL as a separate chain (-ter should be 0)\n"
" 2: treat each chain as a seperate chain (-ter should be <=1)\n"
"\n"
" -outfmt Output format\n"
" 0: (default) full output\n"
" 1: fasta format compact output\n"
" 2: tabular format very compact output\n"
" -1: full output, but without version or citation information\n"
"\n"
" -byresi Whether to assume residue index correspondence between the\n"
" two structures.\n"
" 0: (default) sequence independent alignment\n"
" 1: (same as TMscore program) sequence-dependent superposition,\n"
" i.e. align by residue index\n"
" 2: (same as TMscore -c, should be used with -ter <=1)\n"
" align by residue index and chain ID\n"
" 3: (similar to TMscore -c, should be used with -ter <=1)\n"
" align by residue index and order of chain\n"
"\n"
" -TMcut -1: (default) do not consider TMcut\n"
" Values in [0.5,1): Do not proceed with TM-align for this\n"
" structure pair if TM-score is unlikely to reach TMcut.\n"
" TMcut is normalized is set by -a option:\n"
" -2: normalized by longer structure length\n"
" -1: normalized by shorter structure length\n"
" 0: (default, same as F) normalized by second structure\n"
" 1: same as T, normalized by average structure length\n"
"\n"
" -mirror Whether to align the mirror image of input structure\n"
" 0: (default) do not align mirrored structure\n"
" 1: align mirror of chain1 to origin chain2\n"
"\n"
" -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n"
" 0: (default) only align 'ATOM ' residues\n"
" 1: align both 'ATOM ' and 'HETATM' residues\n"
"\n"
" -infmt1 Input format for chain1\n"
" -infmt2 Input format for chain2\n"
" -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
" 0: PDB format\n"
" 1: SPICKER format\n"
" 2: xyz format\n"
" 3: PDBx/mmCIF format\n"
<<endl;
}
void print_help(bool h_opt=false)
{
print_version();
cout <<
"\n"
"Usage: TMalign PDB1.pdb PDB2.pdb [Options]\n"
"\n"
"Options:\n"
" -u TM-score normalized by user assigned length (the same as -L)\n"
" warning: it should be >= minimum length of the two structures\n"
" otherwise, TM-score may be >1\n"
"\n"
" -a TM-score normalized by the average length of two structures\n"
" T or F, (default F)\n"
"\n"
" -i Start with an alignment specified in fasta file 'align.txt'\n"
"\n"
" -I Stick to the alignment specified in 'align.txt'\n"
"\n"
" -m Output TM-align rotation matrix\n"
"\n"
" -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n"
"\n"
" -o Output the superposition to 'TM_sup*'\n"
" $ TMalign PDB1.pdb PDB2.pdb -o TM_sup\n"
" View superposed C-alpha traces of aligned regions by RasMol or PyMOL:\n"
" $ rasmol -script TM_sup\n"
" $ pymol -d @TM_sup.pml\n"
" View superposed C-alpha traces of all regions:\n"
" $ rasmol -script TM_sup_all\n"
" $ pymol -d @TM_sup_all.pml\n"
" View superposed full-atom structures of aligned regions:\n"
" $ rasmol -script TM_sup_atm\n"
" $ pymol -d @TM_sup_atm.pml\n"
" View superposed full-atom structures of all regions:\n"
" $ rasmol -script TM_sup_all_atm\n"
" $ pymol -d @TM_sup_all_atm.pml\n"
" View superposed full-atom structures and ligands of all regions\n"
" $ rasmol -script TM_sup_all_atm_lig\n"
" $ pymol -d @TM_sup_all_atm_lig.pml\n"
"\n"
" -fast Fast but slightly inaccurate alignment by fTM-align algorithm\n"
"\n"
" -cp Alignment with circular permutation\n"
"\n"
" -v Print the version of TM-align\n"
"\n"
" -h Print the full help message, including additional options\n"
"\n"
" (Options -u, -a, -d, -o will not change the final structure alignment)\n\n"
"Example usages:\n"
" TMalign PDB1.pdb PDB2.pdb\n"
" TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n"
" TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n"
" TMalign PDB1.pdb PDB2.pdb -i align.txt\n"
" TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n"
" TMalign PDB1.pdb PDB2.pdb -fast\n"
" TMalign PDB1.pdb PDB2.pdb -cp\n"
<<endl;
if (h_opt) print_extra_help();
exit(EXIT_SUCCESS);
}
/* Functions for the core TMalign algorithm, including the entry function
* TMalign_main */
void PrintErrorAndQuit(const string sErrorString)
{
cout << sErrorString << endl;
exit(1);
}
template <typename T> inline T getmin(const T &a, const T &b)
{
return b<a?b:a;
}
template <class A> void NewArray(A *** array, int Narray1, int Narray2)
{
*array=new A* [Narray1];
for(int i=0; i<Narray1; i++) *(*array+i)=new A [Narray2];
}
template <class A> void DeleteArray(A *** array, int Narray)
{
for(int i=0; i<Narray; i++)
if(*(*array+i)) delete [] *(*array+i);
if(Narray) delete [] (*array);
(*array)=NULL;
}
string AAmap(char A)
{
if (A=='A') return "ALA";
if (A=='B') return "ASX";
if (A=='C') return "CYS";
if (A=='D') return "ASP";
if (A=='E') return "GLU";
if (A=='F') return "PHE";
if (A=='G') return "GLY";
if (A=='H') return "HIS";
if (A=='I') return "ILE";
if (A=='K') return "LYS";
if (A=='L') return "LEU";
if (A=='M') return "MET";
if (A=='N') return "ASN";
if (A=='O') return "PYL";
if (A=='P') return "PRO";
if (A=='Q') return "GLN";
if (A=='R') return "ARG";
if (A=='S') return "SER";
if (A=='T') return "THR";
if (A=='U') return "SEC";
if (A=='V') return "VAL";
if (A=='W') return "TRP";
if (A=='Y') return "TYR";
if (A=='Z') return "GLX";
return "UNK";
}
char AAmap(const string &AA)
{
if (AA.compare("ALA")==0 || AA.compare("DAL")==0) return 'A';
if (AA.compare("ASX")==0) return 'B';
if (AA.compare("CYS")==0 || AA.compare("DCY")==0) return 'C';
if (AA.compare("ASP")==0 || AA.compare("DAS")==0) return 'D';
if (AA.compare("GLU")==0 || AA.compare("DGL")==0) return 'E';
if (AA.compare("PHE")==0 || AA.compare("DPN")==0) return 'F';
if (AA.compare("GLY")==0) return 'G';
if (AA.compare("HIS")==0 || AA.compare("DHI")==0) return 'H';
if (AA.compare("ILE")==0 || AA.compare("DIL")==0) return 'I';
if (AA.compare("LYS")==0 || AA.compare("DLY")==0) return 'K';
if (AA.compare("LEU")==0 || AA.compare("DLE")==0) return 'L';
if (AA.compare("MET")==0 || AA.compare("MED")==0 ||
AA.compare("MSE")==0) return 'M';
if (AA.compare("ASN")==0 || AA.compare("DSG")==0) return 'N';
if (AA.compare("PYL")==0) return 'O';
if (AA.compare("PRO")==0 || AA.compare("DPR")==0) return 'P';
if (AA.compare("GLN")==0 || AA.compare("DGN")==0) return 'Q';
if (AA.compare("ARG")==0 || AA.compare("DAR")==0) return 'R';
if (AA.compare("SER")==0 || AA.compare("DSN")==0) return 'S';
if (AA.compare("THR")==0 || AA.compare("DTH")==0) return 'T';
if (AA.compare("SEC")==0) return 'U';
if (AA.compare("VAL")==0 || AA.compare("DVA")==0) return 'V';
if (AA.compare("TRP")==0 || AA.compare("DTR")==0) return 'W';
if (AA.compare("TYR")==0 || AA.compare("DTY")==0) return 'Y';
if (AA.compare("GLX")==0) return 'Z';
return 'X';
}
/* split a long string into vectors by whitespace
* line - input string
* line_vec - output vector
* delimiter - delimiter */
void split(const string &line, vector<string> &line_vec,
const char delimiter=' ')
{
bool within_word = false;
for (int pos=0;pos<line.size();pos++)
{
if (line[pos]==delimiter)
{
within_word = false;
continue;
}
if (!within_word)
{
within_word = true;
line_vec.push_back("");
}
line_vec.back()+=line[pos];
}
}
/* strip white space at the begining or end of string */
string Trim(const string &inputString)
{
string result = inputString;
int idxBegin = inputString.find_first_not_of(" \n\r\t");
int idxEnd = inputString.find_last_not_of(" \n\r\t");
if (idxBegin >= 0 && idxEnd >= 0)
result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
return result;
}
/* split a long string into vectors by whitespace, return both whitespaces
* and non-whitespaces
* line - input string
* line_vec - output vector
* space_vec - output vector
* delimiter - delimiter */
void split_white(const string &line, vector<string> &line_vec,
vector<string>&white_vec, const char delimiter=' ')
{
bool within_word = false;
for (int pos=0;pos<line.size();pos++)
{
if (line[pos]==delimiter)
{
if (within_word==true)
{
white_vec.push_back("");
within_word = false;
}
white_vec.back()+=delimiter;
}
else
{
if (within_word==false)
{
line_vec.push_back("");
within_word = true;
}
line_vec.back()+=line[pos];
}
}
}
size_t get_PDB_lines(const string filename,
vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
vector<int> &mol_vec, const int ter_opt, const int infmt_opt,
const string atom_opt, const int split_opt, const int het_opt)
{
size_t i=0; // resi i.e. atom index
string line;
char chainID=0;
string resi="";
bool select_atom=false;
size_t model_idx=0;
vector<string> tmp_str_vec;
ifstream fin;
fin.open(filename.c_str());
if (infmt_opt==0||infmt_opt==-1) // PDB format
{
while (fin.good())
{
getline(fin, line);
if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
return get_PDB_lines(filename,PDB_lines,chainID_list,
mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt);
if (i > 0)
{
if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break;
}
if (split_opt && line.compare(0,3,"END")==0) chainID=0;
if ((line.compare(0, 6, "ATOM ")==0 ||
(line.compare(0, 6, "HETATM")==0 && het_opt))
&& line.size()>=54 && (line[16]==' ' || line[16]=='A'))
{
if (atom_opt=="auto")
select_atom=(line.compare(12,4," CA ")==0);
else select_atom=(line.compare(12,4,atom_opt)==0);
if (select_atom)
{
if (!chainID)
{
chainID=line[21];
model_idx++;
stringstream i8_stream;
i=0;
if (split_opt==2) // split by chain
{
if (chainID==' ')
{
if (ter_opt>=1) i8_stream << ":_";
else i8_stream<<':'<<model_idx<<":_";
}
else
{
if (ter_opt>=1) i8_stream << ':' << chainID;
else i8_stream<<':'<<model_idx<<':'<<chainID;
}
chainID_list.push_back(i8_stream.str());
}
else if (split_opt==1) // split by model
{
i8_stream << ':' << model_idx;
chainID_list.push_back(i8_stream.str());
}
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
}
else if (ter_opt>=2 && chainID!=line[21]) break;
if (split_opt==2 && chainID!=line[21])
{
chainID=line[21];
i=0;
stringstream i8_stream;
if (chainID==' ')
{
if (ter_opt>=1) i8_stream << ":_";
else i8_stream<<':'<<model_idx<<":_";
}
else
{
if (ter_opt>=1) i8_stream << ':' << chainID;
else i8_stream<<':'<<model_idx<<':'<<chainID;
}
chainID_list.push_back(i8_stream.str());
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
}
if (resi==line.substr(22,5))
cerr<<"Warning! Duplicated residue "<<resi<<endl;
resi=line.substr(22,5); // including insertion code
PDB_lines.back().push_back(line);
if (line[17]==' ' && (line[18]=='D'||line[18]==' ')) mol_vec.back()++;
else mol_vec.back()--;
i++;
}
}
}
}
else if (infmt_opt==1) // SPICKER format
{
int L=0;
float x,y,z;
stringstream i8_stream;
while (fin.good())
{
fin >>L>>x>>y>>z;
getline(fin, line);
if (!fin.good()) break;
model_idx++;
stringstream i8_stream;
i8_stream << ':' << model_idx;
chainID_list.push_back(i8_stream.str());
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
for (i=0;i<L;i++)
{
fin >>x>>y>>z;
i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA UNK "<<setw(4)
<<i+1<<" "<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x<<setw(8)<<y<<setw(8)<<z;
line=i8_stream.str();
i8_stream.str(string());
PDB_lines.back().push_back(line);
}
getline(fin, line);
}
}
else if (infmt_opt==2) // xyz format
{
int L=0;
char A;
stringstream i8_stream;
while (fin.good())
{
getline(fin, line);
L=atoi(line.c_str());
getline(fin, line);
for (i=0;i<line.size();i++)
if (line[i]==' '||line[i]=='\t') break;
if (!fin.good()) break;
chainID_list.push_back(':'+line.substr(0,i));
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
for (i=0;i<L;i++)
{
getline(fin, line);
i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA "
<<AAmap(line[0])<<" "<<setw(4)<<i+1<<" "
<<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8);
line=i8_stream.str();
i8_stream.str(string());
PDB_lines.back().push_back(line);
if (line[0]>='a' && line[0]<='z') mol_vec.back()++; // RNA
else mol_vec.back()--;
}
}
}
else if (infmt_opt==3) // PDBx/mmCIF format
{
bool loop_ = false; // not reading following content
map<string,int> _atom_site;
int atom_site_pos;
vector<string> line_vec;
string alt_id="."; // alternative location indicator
string asym_id="."; // this is similar to chainID, except that
// chainID is char while asym_id is a string
// with possibly multiple char
string prev_asym_id="";
string AA=""; // residue name
string atom="";
string prev_resi="";
string model_index=""; // the same as model_idx but type is string
stringstream i8_stream;
while (fin.good())
{
getline(fin, line);
if (line.size()==0) continue;
if (loop_) loop_ = (line.size()>=2)?(line.compare(0,2,"# ")):(line.compare(0,1,"#"));
if (!loop_)
{
if (line.compare(0,5,"loop_")) continue;
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
if (line.size()) break;
}
if (line.compare(0,11,"_atom_site.")) continue;
loop_=true;
_atom_site.clear();
atom_site_pos=0;
_atom_site[Trim(line.substr(11))]=atom_site_pos;
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
if (line.size()==0) continue;
if (line.compare(0,11,"_atom_site.")) break;
_atom_site[Trim(line.substr(11))]=++atom_site_pos;
}
if (_atom_site.count("group_PDB")*
_atom_site.count("label_atom_id")*
_atom_site.count("label_comp_id")*
(_atom_site.count("auth_asym_id")+
_atom_site.count("label_asym_id"))*
(_atom_site.count("auth_seq_id")+
_atom_site.count("label_seq_id"))*
_atom_site.count("Cartn_x")*
_atom_site.count("Cartn_y")*
_atom_site.count("Cartn_z")==0)
{
loop_ = false;
cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_atom_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
continue;
}
}
line_vec.clear();
split(line,line_vec);
if (line_vec[_atom_site["group_PDB"]]!="ATOM" && (het_opt==0 ||
line_vec[_atom_site["group_PDB"]]!="HETATM")) continue;
alt_id=".";
if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
alt_id=line_vec[_atom_site["label_alt_id"]];
if (alt_id!="." && alt_id!="A") continue;
atom=line_vec[_atom_site["label_atom_id"]];
if (atom[0]=='"') atom=atom.substr(1);
if (atom.size() && atom[atom.size()-1]=='"')
atom=atom.substr(0,atom.size()-1);
if (atom.size()==0) continue;
if (atom.size()==1) atom=" "+atom+" ";
else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H
else if (atom.size()==3) atom=" "+atom;
else if (atom.size()>=5) continue;
AA=line_vec[_atom_site["label_comp_id"]]; // residue name
if (AA.size()==1) AA=" "+AA;
else if (AA.size()==2) AA=" " +AA;
else if (AA.size()>=4) continue;
if (atom_opt=="auto")
select_atom=(atom==" CA ");
else select_atom=(atom==atom_opt);
if (!select_atom) continue;
if (_atom_site.count("auth_asym_id"))
asym_id=line_vec[_atom_site["auth_asym_id"]];
else asym_id=line_vec[_atom_site["label_asym_id"]];
if (asym_id==".") asym_id=" ";
if (_atom_site.count("pdbx_PDB_model_num") &&
model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]])
{
model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
if (PDB_lines.size() && ter_opt>=1) break;
if (PDB_lines.size()==0 || split_opt>=1)
{
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
prev_asym_id=asym_id;
if (split_opt==1 && ter_opt==0) chainID_list.push_back(
':'+model_index);
else if (split_opt==2 && ter_opt==0)
chainID_list.push_back(':'+model_index+':'+asym_id);
else if (split_opt==2 && ter_opt==1)
chainID_list.push_back(':'+asym_id);
}
}
if (prev_asym_id!=asym_id)
{
if (prev_asym_id!="" && ter_opt>=2) break;
if (split_opt>=2)
{
PDB_lines.push_back(tmp_str_vec);
mol_vec.push_back(0);
if (split_opt==1 && ter_opt==0) chainID_list.push_back(
':'+model_index);
else if (split_opt==2 && ter_opt==0)
chainID_list.push_back(':'+model_index+':'+asym_id);
else if (split_opt==2 && ter_opt==1)
chainID_list.push_back(':'+asym_id);
}
}
if (prev_asym_id!=asym_id) prev_asym_id=asym_id;
if (AA[0]==' ' && (AA[1]=='D'||AA[1]==' ')) mol_vec.back()++;
else mol_vec.back()--;
if (_atom_site.count("auth_seq_id"))
resi=line_vec[_atom_site["auth_seq_id"]];
else resi=line_vec[_atom_site["label_seq_id"]];
if (_atom_site.count("pdbx_PDB_ins_code") &&
line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
else resi+=" ";
if (prev_resi==resi)
cerr<<"Warning! Duplicated residue "<<resi<<endl;
prev_resi=resi;
i++;
i8_stream<<"ATOM "
<<setw(5)<<i<<" "<<atom<<" "<<AA<<" "<<asym_id[0]
<<setw(5)<<resi.substr(0,5)<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]].substr(0,8)
<<setw(8)<<line_vec[_atom_site["Cartn_y"]].substr(0,8)
<<setw(8)<<line_vec[_atom_site["Cartn_z"]].substr(0,8);
PDB_lines.back().push_back(i8_stream.str());
i8_stream.str(string());
}
_atom_site.clear();
line_vec.clear();
alt_id.clear();
asym_id.clear();
AA.clear();
}
fin.close();
line.clear();
if (!split_opt) chainID_list.push_back("");
return PDB_lines.size();
}
/* read fasta file from filename. sequence is stored into FASTA_lines
* while sequence name is stored into chainID_list.
* if ter_opt >=1, only read the first sequence.
* if ter_opt ==0, read all sequences.
* if split_opt >=1 and ter_opt ==0, each sequence is a separate entry.
* if split_opt ==0 and ter_opt ==0, all sequences are combined into one */
size_t get_FASTA_lines(const string filename,
vector<vector<string> >&FASTA_lines, vector<string> &chainID_list,
vector<int> &mol_vec, const int ter_opt=3, const int split_opt=0)
{
string line;
vector<string> tmp_str_vec;
int l;
ifstream fin;
fin.open(filename.c_str());
while (fin.good())
{
getline(fin, line);
if (line.size()==0 || line[0]=='#') continue;
if (line[0]=='>')
{
if (FASTA_lines.size())
{
if (ter_opt) break;
if (split_opt==0) continue;
}
FASTA_lines.push_back(tmp_str_vec);
FASTA_lines.back().push_back("");
mol_vec.push_back(0);
if (ter_opt==0 && split_opt)
{
line[0]=':';
chainID_list.push_back(line);
}
else chainID_list.push_back("");
}
else
{
FASTA_lines.back()[0]+=line;
for (l=0;l<line.size();l++) mol_vec.back()+=
('a'<=line[l] && line[l]<='z')-('A'<=line[l] && line[l]<='Z');
}
}
line.clear();
fin.close();
return FASTA_lines.size();
}
/* extract pairwise sequence alignment from residue index vectors,
* assuming that "sequence" contains two empty strings.
* return length of alignment, including gap. */
int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
const vector<string> resi_vec1, const vector<string> resi_vec2,
const int byresi_opt)
{
sequence.clear();
sequence.push_back("");
sequence.push_back("");
int i1=0; // positions in resi_vec1
int i2=0; // positions in resi_vec2
int xlen=resi_vec1.size();
int ylen=resi_vec2.size();
map<char,int> chainID_map1;
map<char,int> chainID_map2;
if (byresi_opt==3)
{
vector<char> chainID_vec;
char chainID;
int i;
for (i=0;i<xlen;i++)
{
chainID=resi_vec1[i][5];
if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
{
chainID_vec.push_back(chainID);
chainID_map1[chainID]=chainID_vec.size();
}
}
chainID_vec.clear();
for (i=0;i<ylen;i++)
{
chainID=resi_vec2[i][5];
if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
{
chainID_vec.push_back(chainID);
chainID_map2[chainID]=chainID_vec.size();
}
}
chainID_vec.clear();
}
while(i1<xlen && i2<ylen)
{
if ((byresi_opt<=2 && resi_vec1[i1]==resi_vec2[i2]) || (byresi_opt==3
&& resi_vec1[i1].substr(0,5)==resi_vec2[i2].substr(0,5)
&& chainID_map1[resi_vec1[i1][5]]==chainID_map2[resi_vec2[i2][5]]))
{
sequence[0]+=seqx[i1++];
sequence[1]+=seqy[i2++];
}
else if (atoi(resi_vec1[i1].substr(0,4).c_str())<=
atoi(resi_vec2[i2].substr(0,4).c_str()))
{
sequence[0]+=seqx[i1++];
sequence[1]+='-';
}
else
{
sequence[0]+='-';
sequence[1]+=seqy[i2++];
}
}
chainID_map1.clear();
chainID_map2.clear();
return sequence[0].size();
}
int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
vector<string> &resi_vec, const int byresi_opt)
{
int i;
for (i=0;i<PDB_lines.size();i++)
{
a[i][0] = atof(PDB_lines[i].substr(30, 8).c_str());
a[i][1] = atof(PDB_lines[i].substr(38, 8).c_str());
a[i][2] = atof(PDB_lines[i].substr(46, 8).c_str());
seq[i] = AAmap(PDB_lines[i].substr(17, 3));
if (byresi_opt>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+
PDB_lines[i][21]);
if (byresi_opt==1) resi_vec.push_back(PDB_lines[i].substr(22,5));
}
seq[i]='\0';
return i;
}
double dist(double x[3], double y[3])
{
double d1=x[0]-y[0];
double d2=x[1]-y[1];
double d3=x[2]-y[2];
return (d1*d1 + d2*d2 + d3*d3);
}
double dot(double *a, double *b)
{
return (a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
}
void transform(double t[3], double u[3][3], double *x, double *x1)
{
x1[0]=t[0]+dot(&u[0][0], x);
x1[1]=t[1]+dot(&u[1][0], x);
x1[2]=t[2]+dot(&u[2][0], x);
}
void do_rotation(double **x, double **x1, int len, double t[3], double u[3][3])
{
for(int i=0; i<len; i++)
{
transform(t, u, &x[i][0], &x1[i][0]);
}
}
/* read user specified pairwise alignment from 'fname_lign' to 'sequence'.
* This function should only be called by main function, as it will
* terminate a program if wrong alignment is given */
void read_user_alignment(vector<string>&sequence, const string &fname_lign,
const int i_opt)
{
if (fname_lign == "")
PrintErrorAndQuit("Please provide a file name for option -i!");
// open alignment file
int n_p = 0;// number of structures in alignment file
string line;
ifstream fileIn(fname_lign.c_str());
if (fileIn.is_open())
{
while (fileIn.good())
{
getline(fileIn, line);
if (line.compare(0, 1, ">") == 0)// Flag for a new structure
{
if (n_p >= 2) break;
sequence.push_back("");
n_p++;
}
else if (n_p > 0 && line!="") sequence.back()+=line;
}
fileIn.close();
}
else PrintErrorAndQuit("ERROR! Alignment file does not exist.");
if (n_p < 2)
PrintErrorAndQuit("ERROR: Fasta format is wrong, two proteins should be included.");
if (sequence[0].size() != sequence[1].size())
PrintErrorAndQuit("ERROR! FASTA file is wrong. The length in alignment should be equal for the two aligned proteins.");
if (i_opt==3)
{
int aligned_resNum=0;
for (int i=0;i<sequence[0].size();i++)
aligned_resNum+=(sequence[0][i]!='-' && sequence[1][i]!='-');
if (aligned_resNum<3)
PrintErrorAndQuit("ERROR! Superposition is undefined for <3 aligned residues.");
}
line.clear();
return;
}
/* read list of entries from 'name' to 'chain_list'.
* dir_opt is the folder name (prefix).
* suffix_opt is the file name extension (suffix_opt).
* This function should only be called by main function, as it will
* terminate a program if wrong alignment is given */
void file2chainlist(vector<string>&chain_list, const string &name,
const string &dir_opt, const string &suffix_opt)
{
ifstream fp(name.c_str());
if (! fp.is_open())
PrintErrorAndQuit(("Can not open file: "+name+'\n').c_str());
string line;
while (fp.good())
{
getline(fp, line);
if (! line.size()) continue;
chain_list.push_back(dir_opt+Trim(line)+suffix_opt);
}
fp.close();
line.clear();
}
/**************************************************************************
Implemetation of Kabsch algoritm for finding the best rotation matrix
---------------------------------------------------------------------------
x - x(i,m) are coordinates of atom m in set x (input)
y - y(i,m) are coordinates of atom m in set y (input)
n - n is number of atom pairs (input)
mode - 0:calculate rms only (input)
1:calculate u,t only (takes medium)
2:calculate rms,u,t (takes longer)
rms - sum of w*(ux+t-y)**2 over all atom pairs (output)
u - u(i,j) is rotation matrix for best superposition (output)
t - t(i) is translation vector for best superposition (output)
**************************************************************************/
bool Kabsch(double **x, double **y, int n, int mode, double *rms,
double t[3], double u[3][3])
{
int i, j, m, m1, l, k;
double e0, rms1, d, h, g;
double cth, sth, sqrth, p, det, sigma;
double xc[3], yc[3];
double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6];
double sqrt3 = 1.73205080756888, tol = 0.01;
int ip[] = { 0, 1, 3, 1, 2, 4, 3, 4, 5 };
int ip2312[] = { 1, 2, 0, 1 };
int a_failed = 0, b_failed = 0;
double epsilon = 0.00000001;
//initializtation
*rms = 0;
rms1 = 0;
e0 = 0;
double c1[3], c2[3];
double s1[3], s2[3];
double sx[3], sy[3], sz[3];
for (i = 0; i < 3; i++)
{
s1[i] = 0.0;
s2[i] = 0.0;
sx[i] = 0.0;
sy[i] = 0.0;
sz[i] = 0.0;
}
for (i = 0; i<3; i++)
{
xc[i] = 0.0;
yc[i] = 0.0;
t[i] = 0.0;
for (j = 0; j<3; j++)
{
u[i][j] = 0.0;
r[i][j] = 0.0;
a[i][j] = 0.0;
if (i == j)
{
u[i][j] = 1.0;
a[i][j] = 1.0;
}
}
}
if (n<1) return false;
//compute centers for vector sets x, y
for (i = 0; i<n; i++)
{
for (j = 0; j < 3; j++)
{
c1[j] = x[i][j];
c2[j] = y[i][j];
s1[j] += c1[j];
s2[j] += c2[j];
}
for (j = 0; j < 3; j++)
{
sx[j] += c1[0] * c2[j];
sy[j] += c1[1] * c2[j];
sz[j] += c1[2] * c2[j];
}
}
for (i = 0; i < 3; i++)
{
xc[i] = s1[i] / n;
yc[i] = s2[i] / n;
}
if (mode == 2 || mode == 0)
for (int mm = 0; mm < n; mm++)
for (int nn = 0; nn < 3; nn++)
e0 += (x[mm][nn] - xc[nn]) * (x[mm][nn] - xc[nn]) +
(y[mm][nn] - yc[nn]) * (y[mm][nn] - yc[nn]);
for (j = 0; j < 3; j++)
{
r[j][0] = sx[j] - s1[0] * s2[j] / n;
r[j][1] = sy[j] - s1[1] * s2[j] / n;
r[j][2] = sz[j] - s1[2] * s2[j] / n;
}
//compute determinat of matrix r
det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1])\
- r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0])\
+ r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]);
sigma = det;
//compute tras(r)*r
m = 0;
for (j = 0; j<3; j++)
{
for (i = 0; i <= j; i++)
{
rr[m] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j];
m++;
}
}
double spur = (rr[0] + rr[2] + rr[5]) / 3.0;
double cof = (((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5])\
- rr[3] * rr[3]) + rr[0] * rr[2]) - rr[1] * rr[1]) / 3.0;
det = det*det;
for (i = 0; i<3; i++) e[i] = spur;
if (spur>0)
{
d = spur*spur;
h = d - cof;
g = (spur*cof - det) / 2.0 - spur*h;
if (h>0)
{
sqrth = sqrt(h);
d = h*h*h - g*g;
if (d<0.0) d = 0.0;
d = atan2(sqrt(d), -g) / 3.0;
cth = sqrth * cos(d);
sth = sqrth*sqrt3*sin(d);
e[0] = (spur + cth) + cth;
e[1] = (spur - cth) + sth;
e[2] = (spur - cth) - sth;
if (mode != 0)
{//compute a
for (l = 0; l<3; l = l + 2)
{
d = e[l];
ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4];
ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4];
ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3];
ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4];
ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3];
ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1];
if (fabs(ss[0]) <= epsilon) ss[0] = 0.0;
if (fabs(ss[1]) <= epsilon) ss[1] = 0.0;
if (fabs(ss[2]) <= epsilon) ss[2] = 0.0;
if (fabs(ss[3]) <= epsilon) ss[3] = 0.0;
if (fabs(ss[4]) <= epsilon) ss[4] = 0.0;
if (fabs(ss[5]) <= epsilon) ss[5] = 0.0;
if (fabs(ss[0]) >= fabs(ss[2]))
{
j = 0;
if (fabs(ss[0]) < fabs(ss[5])) j = 2;
}
else if (fabs(ss[2]) >= fabs(ss[5])) j = 1;
else j = 2;
d = 0.0;
j = 3 * j;
for (i = 0; i<3; i++)
{
k = ip[i + j];
a[i][l] = ss[k];
d = d + ss[k] * ss[k];
}
//if( d > 0.0 ) d = 1.0 / sqrt(d);
if (d > epsilon) d = 1.0 / sqrt(d);
else d = 0.0;
for (i = 0; i<3; i++) a[i][l] = a[i][l] * d;
}//for l
d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2];
if ((e[0] - e[1]) >(e[1] - e[2]))
{
m1 = 2;
m = 0;
}
else
{
m1 = 0;
m = 2;
}
p = 0;
for (i = 0; i<3; i++)
{
a[i][m1] = a[i][m1] - d*a[i][m];
p = p + a[i][m1] * a[i][m1];
}
if (p <= tol)
{
p = 1.0;
for (i = 0; i<3; i++)
{
if (p < fabs(a[i][m])) continue;
p = fabs(a[i][m]);
j = i;
}
k = ip2312[j];
l = ip2312[j + 1];
p = sqrt(a[k][m] * a[k][m] + a[l][m] * a[l][m]);
if (p > tol)
{
a[j][m1] = 0.0;
a[k][m1] = -a[l][m] / p;
a[l][m1] = a[k][m] / p;
}
else a_failed = 1;
}//if p<=tol
else
{
p = 1.0 / sqrt(p);
for (i = 0; i<3; i++) a[i][m1] = a[i][m1] * p;
}//else p<=tol
if (a_failed != 1)
{
a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2];
a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2];
a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2];
}
}//if(mode!=0)
}//h>0
//compute b anyway
if (mode != 0 && a_failed != 1)//a is computed correctly
{
//compute b
for (l = 0; l<2; l++)
{
d = 0.0;
for (i = 0; i<3; i++)
{
b[i][l] = r[i][0] * a[0][l] +
r[i][1] * a[1][l] + r[i][2] * a[2][l];
d = d + b[i][l] * b[i][l];
}
//if( d > 0 ) d = 1.0 / sqrt(d);
if (d > epsilon) d = 1.0 / sqrt(d);
else d = 0.0;
for (i = 0; i<3; i++) b[i][l] = b[i][l] * d;
}
d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1];
p = 0.0;
for (i = 0; i<3; i++)
{
b[i][1] = b[i][1] - d*b[i][0];
p += b[i][1] * b[i][1];
}
if (p <= tol)
{
p = 1.0;
for (i = 0; i<3; i++)
{
if (p<fabs(b[i][0])) continue;
p = fabs(b[i][0]);
j = i;
}
k = ip2312[j];
l = ip2312[j + 1];
p = sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]);
if (p > tol)
{
b[j][1] = 0.0;
b[k][1] = -b[l][0] / p;
b[l][1] = b[k][0] / p;
}
else b_failed = 1;
}//if( p <= tol )
else
{
p = 1.0 / sqrt(p);
for (i = 0; i<3; i++) b[i][1] = b[i][1] * p;
}
if (b_failed != 1)
{
b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0];
b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0];
b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0];
//compute u
for (i = 0; i<3; i++)
for (j = 0; j<3; j++)
u[i][j] = b[i][0] * a[j][0] +
b[i][1] * a[j][1] + b[i][2] * a[j][2];
}
//compute t
for (i = 0; i<3; i++)
t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) -
u[i][2] * xc[2];
}//if(mode!=0 && a_failed!=1)
}//spur>0
else //just compute t and errors
{
//compute t
for (i = 0; i<3; i++)
t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) -
u[i][2] * xc[2];
}//else spur>0
//compute rms
for (i = 0; i<3; i++)
{
if (e[i] < 0) e[i] = 0;
e[i] = sqrt(e[i]);
}
d = e[2];
if (sigma < 0.0) d = -d;
d = (d + e[1]) + e[0];
if (mode == 2 || mode == 0)
{
rms1 = (e0 - d) - d;
if (rms1 < 0.0) rms1 = 0.0;
}
*rms = rms1;
return true;
}
/* Partial implementation of Needleman-Wunsch (NW) dymanamic programming for
* global alignment. The three NWDP_TM functions below are not complete
* implementation of NW algorithm because gap jumping in the standard Gotoh
* algorithm is not considered. Since the gap opening and gap extension is
* the same, this is not a problem. This code was exploited in TM-align
* because it is about 1.5 times faster than a complete NW implementation.
* Nevertheless, if gap openning != gap extension shall be implemented in
* the future, the Gotoh algorithm must be implemented. In rare scenarios,
* it is also possible to have asymmetric alignment (i.e.
* TMalign A.pdb B.pdb and TMalign B.pdb A.pdb have different TM_A and TM_B
* values) caused by the NWPD_TM implement.
*/
/* Input: score[1:len1, 1:len2], and gap_open
* Output: j2i[1:len2] \in {1:len1} U {-1}
* path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
void NWDP_TM(double **score, bool **path, double **val,
int len1, int len2, double gap_open, int j2i[])
{
int i, j;
double h, v, d;
//initialization
for(i=0; i<=len1; i++)
{
val[i][0]=0;
//val[i][0]=i*gap_open;
path[i][0]=false; //not from diagonal
}
for(j=0; j<=len2; j++)
{
val[0][j]=0;
//val[0][j]=j*gap_open;
path[0][j]=false; //not from diagonal
j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
}
//decide matrix and path
for(i=1; i<=len1; i++)
{
for(j=1; j<=len2; j++)
{
d=val[i-1][j-1]+score[i][j]; //diagonal
//symbol insertion in horizontal (= a gap in vertical)
h=val[i-1][j];
if(path[i-1][j]) h += gap_open; //aligned in last position
//symbol insertion in vertical
v=val[i][j-1];
if(path[i][j-1]) v += gap_open; //aligned in last position
if(d>=h && d>=v)
{
path[i][j]=true; //from diagonal
val[i][j]=d;
}
else
{
path[i][j]=false; //from horizontal
if(v>=h) val[i][j]=v;
else val[i][j]=h;
}
} //for i
} //for j
//trace back to extract the alignment
i=len1;
j=len2;
while(i>0 && j>0)
{
if(path[i][j]) //from diagonal
{
j2i[j-1]=i-1;
i--;
j--;
}
else
{
h=val[i-1][j];
if(path[i-1][j]) h +=gap_open;
v=val[i][j-1];
if(path[i][j-1]) v +=gap_open;
if(v>=h) j--;
else i--;
}
}
}
/* Input: vectors x, y, rotation matrix t, u, scale factor d02, and gap_open
* Output: j2i[1:len2] \in {1:len1} U {-1}
* path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
void NWDP_TM(bool **path, double **val, double **x, double **y,
int len1, int len2, double t[3], double u[3][3],
double d02, double gap_open, int j2i[])
{
int i, j;
double h, v, d;
//initialization. use old val[i][0] and val[0][j] initialization
//to minimize difference from TMalign fortran version
for(i=0; i<=len1; i++)
{
val[i][0]=0;
//val[i][0]=i*gap_open;
path[i][0]=false; //not from diagonal
}
for(j=0; j<=len2; j++)
{
val[0][j]=0;
//val[0][j]=j*gap_open;
path[0][j]=false; //not from diagonal
j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
}
double xx[3], dij;
//decide matrix and path
for(i=1; i<=len1; i++)
{
transform(t, u, &x[i-1][0], xx);
for(j=1; j<=len2; j++)
{
dij=dist(xx, &y[j-1][0]);
d=val[i-1][j-1] + 1.0/(1+dij/d02);
//symbol insertion in horizontal (= a gap in vertical)
h=val[i-1][j];
if(path[i-1][j]) h += gap_open; //aligned in last position
//symbol insertion in vertical
v=val[i][j-1];
if(path[i][j-1]) v += gap_open; //aligned in last position
if(d>=h && d>=v)
{
path[i][j]=true; //from diagonal
val[i][j]=d;
}
else
{
path[i][j]=false; //from horizontal
if(v>=h) val[i][j]=v;
else val[i][j]=h;
}
} //for i
} //for j
//trace back to extract the alignment
i=len1;
j=len2;
while(i>0 && j>0)
{
if(path[i][j]) //from diagonal
{
j2i[j-1]=i-1;
i--;
j--;
}
else
{
h=val[i-1][j];
if(path[i-1][j]) h +=gap_open;
v=val[i][j-1];
if(path[i][j-1]) v +=gap_open;
if(v>=h) j--;
else i--;
}
}
}
/* This is the same as the previous NWDP_TM, except for the lack of rotation
* Input: vectors x, y, scale factor d02, and gap_open
* Output: j2i[1:len2] \in {1:len1} U {-1}
* path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
void NWDP_SE(bool **path, double **val, double **x, double **y,
int len1, int len2, double d02, double gap_open, int j2i[])
{
int i, j;
double h, v, d;
for(i=0; i<=len1; i++)
{
val[i][0]=0;
path[i][0]=false; //not from diagonal
}
for(j=0; j<=len2; j++)
{
val[0][j]=0;
path[0][j]=false; //not from diagonal
j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
}
double dij;
//decide matrix and path
for(i=1; i<=len1; i++)
{
for(j=1; j<=len2; j++)
{
dij=dist(&x[i-1][0], &y[j-1][0]);
d=val[i-1][j-1] + 1.0/(1+dij/d02);
//symbol insertion in horizontal (= a gap in vertical)
h=val[i-1][j];
if(path[i-1][j]) h += gap_open; //aligned in last position
//symbol insertion in vertical
v=val[i][j-1];
if(path[i][j-1]) v += gap_open; //aligned in last position
if(d>=h && d>=v)
{
path[i][j]=true; //from diagonal
val[i][j]=d;
}
else
{
path[i][j]=false; //from horizontal
if(v>=h) val[i][j]=v;
else val[i][j]=h;
}
} //for i
} //for j
//trace back to extract the alignment
i=len1;
j=len2;
while(i>0 && j>0)
{
if(path[i][j]) //from diagonal
{
j2i[j-1]=i-1;
i--;
j--;
}
else
{
h=val[i-1][j];
if(path[i-1][j]) h +=gap_open;
v=val[i][j-1];
if(path[i][j-1]) v +=gap_open;
if(v>=h) j--;
else i--;
}
}
}
/* +ss
* Input: secondary structure secx, secy, and gap_open
* Output: j2i[1:len2] \in {1:len1} U {-1}
* path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
void NWDP_TM(bool **path, double **val, const char *secx, const char *secy,
const int len1, const int len2, const double gap_open, int j2i[])
{
int i, j;
double h, v, d;
//initialization
for(i=0; i<=len1; i++)
{
val[i][0]=0;
//val[i][0]=i*gap_open;
path[i][0]=false; //not from diagonal
}
for(j=0; j<=len2; j++)
{
val[0][j]=0;
//val[0][j]=j*gap_open;
path[0][j]=false; //not from diagonal
j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
}
//decide matrix and path
for(i=1; i<=len1; i++)
{
for(j=1; j<=len2; j++)
{
d=val[i-1][j-1] + 1.0*(secx[i-1]==secy[j-1]);
//symbol insertion in horizontal (= a gap in vertical)
h=val[i-1][j];
if(path[i-1][j]) h += gap_open; //aligned in last position
//symbol insertion in vertical
v=val[i][j-1];
if(path[i][j-1]) v += gap_open; //aligned in last position
if(d>=h && d>=v)
{
path[i][j]=true; //from diagonal
val[i][j]=d;
}
else
{
path[i][j]=false; //from horizontal
if(v>=h) val[i][j]=v;
else val[i][j]=h;
}
} //for i
} //for j
//trace back to extract the alignment
i=len1;
j=len2;
while(i>0 && j>0)
{
if(path[i][j]) //from diagonal
{
j2i[j-1]=i-1;
i--;
j--;
}
else
{
h=val[i-1][j];
if(path[i-1][j]) h +=gap_open;
v=val[i][j-1];
if(path[i][j-1]) v +=gap_open;
if(v>=h) j--;
else i--;
}
}
}
void parameter_set4search(const int xlen, const int ylen,
double &D0_MIN, double &Lnorm,
double &score_d8, double &d0, double &d0_search, double &dcu0)
{
//parameter initilization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8
D0_MIN=0.5;
dcu0=4.25; //update 3.85-->4.25
Lnorm=getmin(xlen, ylen); //normaliz TMscore by this in searching
if (Lnorm<=19) //update 15-->19
d0=0.168; //update 0.5-->0.168
else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
D0_MIN=d0+0.8; //this should be moved to above
d0=D0_MIN; //update: best for search
d0_search=d0;
if (d0_search>8) d0_search=8;
if (d0_search<4.5) d0_search=4.5;
score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final
}
void parameter_set4final_C3prime(const double len, double &D0_MIN,
double &Lnorm, double &d0, double &d0_search)
{
D0_MIN=0.3;
Lnorm=len; //normaliz TMscore by this in searching
if(Lnorm<=11) d0=0.3;
else if(Lnorm>11&&Lnorm<=15) d0=0.4;
else if(Lnorm>15&&Lnorm<=19) d0=0.5;
else if(Lnorm>19&&Lnorm<=23) d0=0.6;
else if(Lnorm>23&&Lnorm<30) d0=0.7;
else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
d0_search=d0;
if (d0_search>8) d0_search=8;
if (d0_search<4.5) d0_search=4.5;
}
void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
double &d0, double &d0_search, const int mol_type)
{
if (mol_type>0) // RNA
{
parameter_set4final_C3prime(len, D0_MIN, Lnorm,
d0, d0_search);
return;
}
D0_MIN=0.5;
Lnorm=len; //normaliz TMscore by this in searching
if (Lnorm<=21) d0=0.5;
else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
if (d0<D0_MIN) d0=D0_MIN;
d0_search=d0;
if (d0_search>8) d0_search=8;
if (d0_search<4.5) d0_search=4.5;
}
void parameter_set4scale(const int len, const double d_s, double &Lnorm,
double &d0, double &d0_search)
{
d0=d_s;
Lnorm=len; //normaliz TMscore by this in searching
d0_search=d0;
if (d0_search>8) d0_search=8;
if (d0_search<4.5) d0_search=4.5;
}
// 1, collect those residues with dis<d;
// 2, calculate TMscore
int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
double *score1, int score_sum_method, const double Lnorm,
const double score_d8, const double d0)
{
double score_sum=0, di;
double d_tmp=d*d;
double d02=d0*d0;
double score_d8_cut = score_d8*score_d8;
int i, n_cut, inc=0;
while(1)
{
n_cut=0;
score_sum=0;
for(i=0; i<n_ali; i++)
{
di = dist(xa[i], ya[i]);
if(di<d_tmp)
{
i_ali[n_cut]=i;
n_cut++;
}
if(score_sum_method==8)
{
if(di<=score_d8_cut) score_sum += 1/(1+di/d02);
}
else score_sum += 1/(1+di/d02);
}
//there are not enough feasible pairs, reliefe the threshold
if(n_cut<3 && n_ali>3)
{
inc++;
double dinc=(d+inc*0.5);
d_tmp = dinc * dinc;
}
else break;
}
*score1=score_sum/Lnorm;
return n_cut;
}
int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
int i_ali[], double *score1, int score_sum_method,
double score_d8, double d0)
{
double score_sum = 0, di;
double d_tmp = d*d;
double d02 = d0*d0;
double score_d8_cut = score_d8*score_d8;
int i, n_cut, inc = 0;
while (1)
{
n_cut = 0;
score_sum = 0;
for (i = 0; i<n_ali; i++)
{
di = dist(xa[i], ya[i]);
if (di<d_tmp)
{
i_ali[n_cut] = i;
n_cut++;
}
if (score_sum_method == 8)
{
if (di <= score_d8_cut) score_sum += 1 / (1 + di / d02);
}
else
{
score_sum += 1 / (1 + di / d02);
}
}
//there are not enough feasible pairs, reliefe the threshold
if (n_cut<3 && n_ali>3)
{
inc++;
double dinc = (d + inc*0.5);
d_tmp = dinc * dinc;
}
else break;
}
*score1 = score_sum / n_ali;
return n_cut;
}
double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step,
int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm,
double score_d8, double d0)
{
int i, m;
double score_max, score, rmsd;
const int kmax=Lali;
int k_ali[kmax], ka, k;
double t[3];
double u[3][3];
double d;
//iterative parameters
int n_it=20; //maximum number of iterations
int n_init_max=6; //maximum number of different fragment length
int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4
int L_ini_min=4;
if(Lali<L_ini_min) L_ini_min=Lali;
int n_init=0, i_init;
for(i=0; i<n_init_max-1; i++)
{
n_init++;
L_ini[i]=(int) (Lali/pow(2.0, (double) i));
if(L_ini[i]<=L_ini_min)
{
L_ini[i]=L_ini_min;
break;
}
}
if(i==n_init_max-1)
{
n_init++;
L_ini[i]=L_ini_min;
}
score_max=-1;
//find the maximum score starting from local structures superposition
int i_ali[kmax], n_cut;
int L_frag; //fragment length
int iL_max; //maximum starting postion for the fragment
for(i_init=0; i_init<n_init; i_init++)
{
L_frag=L_ini[i_init];
iL_max=Lali-L_frag;
i=0;
while(1)
{
//extract the fragment starting from position i
ka=0;
for(k=0; k<L_frag; k++)
{
int kk=k+i;
r1[k][0]=xtm[kk][0];
r1[k][1]=xtm[kk][1];
r1[k][2]=xtm[kk][2];
r2[k][0]=ytm[kk][0];
r2[k][1]=ytm[kk][1];
r2[k][2]=ytm[kk][2];
k_ali[ka]=kk;
ka++;
}
//extract rotation matrix based on the fragment
Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
if (simplify_step != 1)
*Rcomm = 0;
do_rotation(xtm, xt, Lali, t, u);
//get subsegment of this fragment
d = local_d0_search - 1;
n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score,
score_sum_method, Lnorm, score_d8, d0);
if(score>score_max)
{
score_max=score;
//save the rotation matrix
for(k=0; k<3; k++)
{
t0[k]=t[k];
u0[k][0]=u[k][0];
u0[k][1]=u[k][1];
u0[k][2]=u[k][2];
}
}
//try to extend the alignment iteratively
d = local_d0_search + 1;
for(int it=0; it<n_it; it++)
{
ka=0;
for(k=0; k<n_cut; k++)
{
m=i_ali[k];
r1[k][0]=xtm[m][0];
r1[k][1]=xtm[m][1];
r1[k][2]=xtm[m][2];
r2[k][0]=ytm[m][0];
r2[k][1]=ytm[m][1];
r2[k][2]=ytm[m][2];
k_ali[ka]=m;
ka++;
}
//extract rotation matrix based on the fragment
Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
do_rotation(xtm, xt, Lali, t, u);
n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score,
score_sum_method, Lnorm, score_d8, d0);
if(score>score_max)
{
score_max=score;
//save the rotation matrix
for(k=0; k<3; k++)
{
t0[k]=t[k];
u0[k][0]=u[k][0];
u0[k][1]=u[k][1];
u0[k][2]=u[k][2];
}
}
//check if it converges
if(n_cut==ka)
{
for(k=0; k<n_cut; k++)
{
if(i_ali[k]!=k_ali[k]) break;
}
if(k==n_cut) break;
}
} //for iteration
if(i<iL_max)
{
i=i+simplify_step; //shift the fragment
if(i>iL_max) i=iL_max; //do this to use the last missed fragment
}
else if(i>=iL_max) break;
}//while(1)
//end of one fragment
}//for(i_init
return score_max;
}
double TMscore8_search_standard( double **r1, double **r2,
double **xtm, double **ytm, double **xt, int Lali,
double t0[3], double u0[3][3], int simplify_step, int score_sum_method,
double *Rcomm, double local_d0_search, double score_d8, double d0)
{
int i, m;
double score_max, score, rmsd;
const int kmax = Lali;
int k_ali[kmax], ka, k;
double t[3];
double u[3][3];
double d;
//iterative parameters
int n_it = 20; //maximum number of iterations
int n_init_max = 6; //maximum number of different fragment length
int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4
int L_ini_min = 4;
if (Lali<L_ini_min) L_ini_min = Lali;
int n_init = 0, i_init;
for (i = 0; i<n_init_max - 1; i++)
{
n_init++;
L_ini[i] = (int)(Lali / pow(2.0, (double)i));
if (L_ini[i] <= L_ini_min)
{
L_ini[i] = L_ini_min;
break;
}
}
if (i == n_init_max - 1)
{
n_init++;
L_ini[i] = L_ini_min;
}
score_max = -1;
//find the maximum score starting from local structures superposition
int i_ali[kmax], n_cut;
int L_frag; //fragment length
int iL_max; //maximum starting postion for the fragment
for (i_init = 0; i_init<n_init; i_init++)
{
L_frag = L_ini[i_init];
iL_max = Lali - L_frag;
i = 0;
while (1)
{
//extract the fragment starting from position i
ka = 0;
for (k = 0; k<L_frag; k++)
{
int kk = k + i;
r1[k][0] = xtm[kk][0];
r1[k][1] = xtm[kk][1];
r1[k][2] = xtm[kk][2];
r2[k][0] = ytm[kk][0];
r2[k][1] = ytm[kk][1];
r2[k][2] = ytm[kk][2];
k_ali[ka] = kk;
ka++;
}
//extract rotation matrix based on the fragment
Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
if (simplify_step != 1)
*Rcomm = 0;
do_rotation(xtm, xt, Lali, t, u);
//get subsegment of this fragment
d = local_d0_search - 1;
n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
score_sum_method, score_d8, d0);
if (score>score_max)
{
score_max = score;
//save the rotation matrix
for (k = 0; k<3; k++)
{
t0[k] = t[k];
u0[k][0] = u[k][0];
u0[k][1] = u[k][1];
u0[k][2] = u[k][2];
}
}
//try to extend the alignment iteratively
d = local_d0_search + 1;
for (int it = 0; it<n_it; it++)
{
ka = 0;
for (k = 0; k<n_cut; k++)
{
m = i_ali[k];
r1[k][0] = xtm[m][0];
r1[k][1] = xtm[m][1];
r1[k][2] = xtm[m][2];
r2[k][0] = ytm[m][0];
r2[k][1] = ytm[m][1];
r2[k][2] = ytm[m][2];
k_ali[ka] = m;
ka++;
}
//extract rotation matrix based on the fragment
Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
do_rotation(xtm, xt, Lali, t, u);
n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
score_sum_method, score_d8, d0);
if (score>score_max)
{
score_max = score;
//save the rotation matrix
for (k = 0; k<3; k++)
{
t0[k] = t[k];
u0[k][0] = u[k][0];
u0[k][1] = u[k][1];
u0[k][2] = u[k][2];
}
}
//check if it converges
if (n_cut == ka)
{
for (k = 0; k<n_cut; k++)
{
if (i_ali[k] != k_ali[k]) break;
}
if (k == n_cut) break;
}
} //for iteration
if (i<iL_max)
{
i = i + simplify_step; //shift the fragment
if (i>iL_max) i = iL_max; //do this to use the last missed fragment
}
else if (i >= iL_max) break;
}//while(1)
//end of one fragment
}//for(i_init
return score_max;
}
//Comprehensive TMscore search engine
// input: two vector sets: x, y
// an alignment invmap0[] between x and y
// simplify_step: 1 or 40 or other integers
// score_sum_method: 0 for score over all pairs
// 8 for socre over the pairs with dist<score_d8
// output: the best rotaion matrix t, u that results in highest TMscore
double detailed_search(double **r1, double **r2, double **xtm, double **ytm,
double **xt, double **x, double **y, int xlen, int ylen,
int invmap0[], double t[3], double u[3][3], int simplify_step,
int score_sum_method, double local_d0_search, double Lnorm,
double score_d8, double d0)
{
//x is model, y is template, try to superpose onto y
int i, j, k;
double tmscore;
double rmsd;
k=0;
for(i=0; i<ylen; i++)
{
j=invmap0[i];
if(j>=0) //aligned
{
xtm[k][0]=x[j][0];
xtm[k][1]=x[j][1];
xtm[k][2]=x[j][2];
ytm[k][0]=y[i][0];
ytm[k][1]=y[i][1];
ytm[k][2]=y[i][2];
k++;
}
}
//detailed search 40-->1
tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step,
score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
return tmscore;
}
double detailed_search_standard( double **r1, double **r2,
double **xtm, double **ytm, double **xt, double **x, double **y,
int xlen, int ylen, int invmap0[], double t[3], double u[3][3],
int simplify_step, int score_sum_method, double local_d0_search,
const bool& bNormalize, double Lnorm, double score_d8, double d0)
{
//x is model, y is template, try to superpose onto y
int i, j, k;
double tmscore;
double rmsd;
k=0;
for(i=0; i<ylen; i++)
{
j=invmap0[i];
if(j>=0) //aligned
{
xtm[k][0]=x[j][0];
xtm[k][1]=x[j][1];
xtm[k][2]=x[j][2];
ytm[k][0]=y[i][0];
ytm[k][1]=y[i][1];
ytm[k][2]=y[i][2];
k++;
}
}
//detailed search 40-->1
tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u,
simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0);
if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false;
tmscore = tmscore * k / Lnorm;
return tmscore;
}
//compute the score quickly in three iterations
double get_score_fast( double **r1, double **r2, double **xtm, double **ytm,
double **x, double **y, int xlen, int ylen, int invmap[],
double d0, double d0_search, double t[3], double u[3][3])
{
double rms, tmscore, tmscore1, tmscore2;
int i, j, k;
k=0;
for(j=0; j<ylen; j++)
{
i=invmap[j];
if(i>=0)
{
r1[k][0]=x[i][0];
r1[k][1]=x[i][1];
r1[k][2]=x[i][2];
r2[k][0]=y[j][0];
r2[k][1]=y[j][1];
r2[k][2]=y[j][2];
xtm[k][0]=x[i][0];
xtm[k][1]=x[i][1];
xtm[k][2]=x[i][2];
ytm[k][0]=y[j][0];
ytm[k][1]=y[j][1];
ytm[k][2]=y[j][2];
k++;
}
else if(i!=-1) PrintErrorAndQuit("Wrong map!\n");
}
Kabsch(r1, r2, k, 1, &rms, t, u);
//evaluate score
double di;
const int len=k;
double dis[len];
double d00=d0_search;
double d002=d00*d00;
double d02=d0*d0;
int n_ali=k;
double xrot[3];
tmscore=0;
for(k=0; k<n_ali; k++)
{
transform(t, u, &xtm[k][0], xrot);
di=dist(xrot, &ytm[k][0]);
dis[k]=di;
tmscore += 1/(1+di/d02);
}
//second iteration
double d002t=d002;
while(1)
{
j=0;
for(k=0; k<n_ali; k++)
{
if(dis[k]<=d002t)
{
r1[j][0]=xtm[k][0];
r1[j][1]=xtm[k][1];
r1[j][2]=xtm[k][2];
r2[j][0]=ytm[k][0];
r2[j][1]=ytm[k][1];
r2[j][2]=ytm[k][2];
j++;
}
}
//there are not enough feasible pairs, relieve the threshold
if(j<3 && n_ali>3) d002t += 0.5;
else break;
}
if(n_ali!=j)
{
Kabsch(r1, r2, j, 1, &rms, t, u);
tmscore1=0;
for(k=0; k<n_ali; k++)
{
transform(t, u, &xtm[k][0], xrot);
di=dist(xrot, &ytm[k][0]);
dis[k]=di;
tmscore1 += 1/(1+di/d02);
}
//third iteration
d002t=d002+1;
while(1)
{
j=0;
for(k=0; k<n_ali; k++)
{
if(dis[k]<=d002t)
{
r1[j][0]=xtm[k][0];
r1[j][1]=xtm[k][1];
r1[j][2]=xtm[k][2];
r2[j][0]=ytm[k][0];
r2[j][1]=ytm[k][1];
r2[j][2]=ytm[k][2];
j++;
}
}
//there are not enough feasible pairs, relieve the threshold
if(j<3 && n_ali>3) d002t += 0.5;
else break;
}
//evaluate the score
Kabsch(r1, r2, j, 1, &rms, t, u);
tmscore2=0;
for(k=0; k<n_ali; k++)
{
transform(t, u, &xtm[k][0], xrot);
di=dist(xrot, &ytm[k][0]);
tmscore2 += 1/(1+di/d02);
}
}
else
{
tmscore1=tmscore;
tmscore2=tmscore;
}
if(tmscore1>=tmscore) tmscore=tmscore1;
if(tmscore2>=tmscore) tmscore=tmscore2;
return tmscore; // no need to normalize this score because it will not be used for latter scoring
}
//perform gapless threading to find the best initial alignment
//input: x, y, xlen, ylen
//output: y2x0 stores the best alignment: e.g.,
//y2x0[j]=i means:
//the jth element in y is aligned to the ith element in x if i>=0
//the jth element in y is aligned to a gap in x if i==-1
double get_initial(double **r1, double **r2, double **xtm, double **ytm,
double **x, double **y, int xlen, int ylen, int *y2x,
double d0, double d0_search, const bool fast_opt,
double t[3], double u[3][3])
{
int min_len=getmin(xlen, ylen);
if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n");
int min_ali= min_len/2; //minimum size of considered fragment
if(min_ali<=5) min_ali=5;
int n1, n2;
n1 = -ylen+min_ali;
n2 = xlen-min_ali;
int i, j, k, k_best;
double tmscore, tmscore_max=-1;
k_best=n1;
for(k=n1; k<=n2; k+=(fast_opt)?5:1)
{
//get the map
for(j=0; j<ylen; j++)
{
i=j+k;
if(i>=0 && i<xlen) y2x[j]=i;
else y2x[j]=-1;
}
//evaluate the map quickly in three iterations
//this is not real tmscore, it is used to evaluate the goodness of the initial alignment
tmscore=get_score_fast(r1, r2, xtm, ytm,
x, y, xlen, ylen, y2x, d0,d0_search, t, u);
if(tmscore>=tmscore_max)
{
tmscore_max=tmscore;
k_best=k;
}
}
//extract the best map
k=k_best;
for(j=0; j<ylen; j++)
{
i=j+k;
if(i>=0 && i<xlen) y2x[j]=i;
else y2x[j]=-1;
}
return tmscore_max;
}
void smooth(int *sec, int len)
{
int i, j;
//smooth single --x-- => -----
for (i=2; i<len-2; i++)
{
if(sec[i]==2 || sec[i]==4)
{
j=sec[i];
if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j)
sec[i]=1;
}
}
// smooth double
// --xx-- => ------
for (i=0; i<len-5; i++)
{
//helix
if (sec[i]!=2 && sec[i+1]!=2 && sec[i+2]==2 && sec[i+3]==2 &&
sec[i+4]!=2 && sec[i+5]!= 2)
{
sec[i+2]=1;
sec[i+3]=1;
}
//beta
if (sec[i]!=4 && sec[i+1]!=4 && sec[i+2]==4 && sec[i+3]==4 &&
sec[i+4]!=4 && sec[i+5]!= 4)
{
sec[i+2]=1;
sec[i+3]=1;
}
}
//smooth connect
for (i=0; i<len-2; i++)
{
if (sec[i]==2 && sec[i+1]!=2 && sec[i+2]==2) sec[i+1]=2;
else if(sec[i]==4 && sec[i+1]!=4 && sec[i+2]==4) sec[i+1]=4;
}
}
char sec_str(double dis13, double dis14, double dis15,
double dis24, double dis25, double dis35)
{
char s='C';
double delta=2.1;
if (fabs(dis15-6.37)<delta && fabs(dis14-5.18)<delta &&
fabs(dis25-5.18)<delta && fabs(dis13-5.45)<delta &&
fabs(dis24-5.45)<delta && fabs(dis35-5.45)<delta)
{
s='H'; //helix
return s;
}
delta=1.42;
if (fabs(dis15-13 )<delta && fabs(dis14-10.4)<delta &&
fabs(dis25-10.4)<delta && fabs(dis13-6.1 )<delta &&
fabs(dis24-6.1 )<delta && fabs(dis35-6.1 )<delta)
{
s='E'; //strand
return s;
}
if (dis15 < 8) s='T'; //turn
return s;
}
/* secondary stucture assignment for protein:
* 1->coil, 2->helix, 3->turn, 4->strand */
void make_sec(double **x, int len, char *sec)
{
int j1, j2, j3, j4, j5;
double d13, d14, d15, d24, d25, d35;
for(int i=0; i<len; i++)
{
sec[i]='C';
j1=i-2;
j2=i-1;
j3=i;
j4=i+1;
j5=i+2;
if(j1>=0 && j5<len)
{
d13=sqrt(dist(x[j1], x[j3]));
d14=sqrt(dist(x[j1], x[j4]));
d15=sqrt(dist(x[j1], x[j5]));
d24=sqrt(dist(x[j2], x[j4]));
d25=sqrt(dist(x[j2], x[j5]));
d35=sqrt(dist(x[j3], x[j5]));
sec[i]=sec_str(d13, d14, d15, d24, d25, d35);
}
}
sec[len]=0;
}
//get initial alignment from secondary structure alignment
//input: x, y, xlen, ylen
//output: y2x stores the best alignment: e.g.,
//y2x[j]=i means:
//the jth element in y is aligned to the ith element in x if i>=0
//the jth element in y is aligned to a gap in x if i==-1
void get_initial_ss(bool **path, double **val,
const char *secx, const char *secy, int xlen, int ylen, int *y2x)
{
double gap_open=-1.0;
NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x);
}
// get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji
//get initial alignment of local structure superposition
//input: x, y, xlen, ylen
//output: y2x stores the best alignment: e.g.,
//y2x[j]=i means:
//the jth element in y is aligned to the ith element in x if i>=0
//the jth element in y is aligned to a gap in x if i==-1
bool get_initial5( double **r1, double **r2, double **xtm, double **ytm,
bool **path, double **val,
double **x, double **y, int xlen, int ylen, int *y2x,
double d0, double d0_search, const bool fast_opt, const double D0_MIN)
{
double GL, rmsd;
double t[3];
double u[3][3];
double d01 = d0 + 1.5;
if (d01 < D0_MIN) d01 = D0_MIN;
double d02 = d01*d01;
double GLmax = 0;
int aL = getmin(xlen, ylen);
int *invmap = new int[ylen + 1];
// jump on sequence1-------------->
int n_jump1 = 0;
if (xlen > 250)
n_jump1 = 45;
else if (xlen > 200)
n_jump1 = 35;
else if (xlen > 150)
n_jump1 = 25;
else
n_jump1 = 15;
if (n_jump1 > (xlen / 3))
n_jump1 = xlen / 3;
// jump on sequence2-------------->
int n_jump2 = 0;
if (ylen > 250)
n_jump2 = 45;
else if (ylen > 200)
n_jump2 = 35;
else if (ylen > 150)
n_jump2 = 25;
else
n_jump2 = 15;
if (n_jump2 > (ylen / 3))
n_jump2 = ylen / 3;
// fragment to superimpose-------------->
int n_frag[2] = { 20, 100 };
if (n_frag[0] > (aL / 3))
n_frag[0] = aL / 3;
if (n_frag[1] > (aL / 2))
n_frag[1] = aL / 2;
// start superimpose search-------------->
if (fast_opt)
{
n_jump1*=5;
n_jump2*=5;
}
bool flag = false;
for (int i_frag = 0; i_frag < 2; i_frag++)
{
int m1 = xlen - n_frag[i_frag] + 1;
int m2 = ylen - n_frag[i_frag] + 1;
for (int i = 0; i<m1; i = i + n_jump1) //index starts from 0, different from FORTRAN
{
for (int j = 0; j<m2; j = j + n_jump2)
{
for (int k = 0; k<n_frag[i_frag]; k++) //fragment in y
{
r1[k][0] = x[k + i][0];
r1[k][1] = x[k + i][1];
r1[k][2] = x[k + i][2];
r2[k][0] = y[k + j][0];
r2[k][1] = y[k + j][1];
r2[k][2] = y[k + j][2];
}
// superpose the two structures and rotate it
Kabsch(r1, r2, n_frag[i_frag], 1, &rmsd, t, u);
double gap_open = 0.0;
NWDP_TM(path, val, x, y, xlen, ylen,
t, u, d02, gap_open, invmap);
GL = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen,
invmap, d0, d0_search, t, u);
if (GL>GLmax)
{
GLmax = GL;
for (int ii = 0; ii<ylen; ii++) y2x[ii] = invmap[ii];
flag = true;
}
}
}
}
delete[] invmap;
return flag;
}
void score_matrix_rmsd_sec( double **r1, double **r2, double **score,
const char *secx, const char *secy, double **x, double **y,
int xlen, int ylen, int *y2x, const double D0_MIN, double d0)
{
double t[3], u[3][3];
double rmsd, dij;
double d01=d0+1.5;
if(d01 < D0_MIN) d01=D0_MIN;
double d02=d01*d01;
double xx[3];
int i, k=0;
for(int j=0; j<ylen; j++)
{
i=y2x[j];
if(i>=0)
{
r1[k][0]=x[i][0];
r1[k][1]=x[i][1];
r1[k][2]=x[i][2];
r2[k][0]=y[j][0];
r2[k][1]=y[j][1];
r2[k][2]=y[j][2];
k++;
}
}
Kabsch(r1, r2, k, 1, &rmsd, t, u);
for(int ii=0; ii<xlen; ii++)
{
transform(t, u, &x[ii][0], xx);
for(int jj=0; jj<ylen; jj++)
{
dij=dist(xx, &y[jj][0]);
if (secx[ii]==secy[jj])
score[ii+1][jj+1] = 1.0/(1+dij/d02) + 0.5;
else
score[ii+1][jj+1] = 1.0/(1+dij/d02);
}
}
}
//get initial alignment from secondary structure and previous alignments
//input: x, y, xlen, ylen
//output: y2x stores the best alignment: e.g.,
//y2x[j]=i means:
//the jth element in y is aligned to the ith element in x if i>=0
//the jth element in y is aligned to a gap in x if i==-1
void get_initial_ssplus(double **r1, double **r2, double **score, bool **path,
double **val, const char *secx, const char *secy, double **x, double **y,
int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0)
{
//create score matrix for DP
score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen,
y2x0, D0_MIN,d0);
double gap_open=-1.0;
NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x);
}
void find_max_frag(double **x, int len, int *start_max,
int *end_max, double dcu0, const bool fast_opt)
{
int r_min, fra_min=4; //minimum fragment for search
if (fast_opt) fra_min=8;
int start;
int Lfr_max=0;
r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein
if(r_min > fra_min) r_min=fra_min;
int inc=0;
double dcu0_cut=dcu0*dcu0;;
double dcu_cut=dcu0_cut;
while(Lfr_max < r_min)
{
Lfr_max=0;
int j=1; //number of residues at nf-fragment
start=0;
for(int i=1; i<len; i++)
{
if(dist(x[i-1], x[i]) < dcu_cut)
{
j++;
if(i==(len-1))
{
if(j > Lfr_max)
{
Lfr_max=j;
*start_max=start;
*end_max=i;
}
j=1;
}
}
else
{
if(j>Lfr_max)
{
Lfr_max=j;
*start_max=start;
*end_max=i-1;
}
j=1;
start=i;
}
}// for i;
if(Lfr_max < r_min)
{
inc++;
double dinc=pow(1.1, (double) inc) * dcu0;
dcu_cut= dinc*dinc;
}
}//while <;
}
//perform fragment gapless threading to find the best initial alignment
//input: x, y, xlen, ylen
//output: y2x0 stores the best alignment: e.g.,
//y2x0[j]=i means:
//the jth element in y is aligned to the ith element in x if i>=0
//the jth element in y is aligned to a gap in x if i==-1
double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm,
double **x, double **y, int xlen, int ylen,
int *y2x, double d0, double d0_search,
double dcu0, const bool fast_opt, double t[3], double u[3][3])
{
int fra_min=4; //minimum fragment for search
if (fast_opt) fra_min=8;
int fra_min1=fra_min-1; //cutoff for shift, save time
int xstart=0, ystart=0, xend=0, yend=0;
find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt);
find_max_frag(y, ylen, &ystart, ¥d, dcu0, fast_opt);
int Lx = xend-xstart+1;
int Ly = yend-ystart+1;
int *ifr, *y2x_;
int L_fr=getmin(Lx, Ly);
ifr= new int[L_fr];
y2x_= new int[ylen+1];
//select what piece will be used. The original implement may cause
//asymetry, but only when xlen==ylen and Lx==Ly
//if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1
if(Lx<Ly || (Lx==Ly && xlen<ylen))
{
for(int i=0; i<L_fr; i++) ifr[i]=xstart+i;
}
else if(Lx>Ly || (Lx==Ly && xlen>ylen))
{
for(int i=0; i<L_fr; i++) ifr[i]=ystart+i;
}
else // solve asymetric for 1x5gA vs 2q7nA5
{
/* In this case, L0==xlen==ylen; L_fr==Lx==Ly */
int L0=xlen;
double tmscore, tmscore_max=-1;
int i, j, k;
int n1, n2;
int min_len;
int min_ali;
/* part 1, normalized by xlen */
for(i=0; i<L_fr; i++) ifr[i]=xstart+i;
if(L_fr==L0)
{
n1= (int)(L0*0.1); //my index starts from 0
n2= (int)(L0*0.89);
j=0;
for(i=n1; i<= n2; i++)
{
ifr[j]=ifr[i];
j++;
}
L_fr=j;
}
int L1=L_fr;
min_len=getmin(L1, ylen);
min_ali= (int) (min_len/2.5); //minimum size of considered fragment
if(min_ali<=fra_min1) min_ali=fra_min1;
n1 = -ylen+min_ali;
n2 = L1-min_ali;
for(k=n1; k<=n2; k+=(fast_opt)?3:1)
{
//get the map
for(j=0; j<ylen; j++)
{
i=j+k;
if(i>=0 && i<L1) y2x_[j]=ifr[i];
else y2x_[j]=-1;
}
//evaluate the map quickly in three iterations
tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
d0, d0_search, t, u);
if(tmscore>=tmscore_max)
{
tmscore_max=tmscore;
for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
}
}
/* part 2, normalized by ylen */
L_fr=Ly;
for(i=0; i<L_fr; i++) ifr[i]=ystart+i;
if (L_fr==L0)
{
n1= (int)(L0*0.1); //my index starts from 0
n2= (int)(L0*0.89);
j=0;
for(i=n1; i<= n2; i++)
{
ifr[j]=ifr[i];
j++;
}
L_fr=j;
}
int L2=L_fr;
min_len=getmin(xlen, L2);
min_ali= (int) (min_len/2.5); //minimum size of considered fragment
if(min_ali<=fra_min1) min_ali=fra_min1;
n1 = -L2+min_ali;
n2 = xlen-min_ali;
for(k=n1; k<=n2; k++)
{
//get the map
for(j=0; j<ylen; j++) y2x_[j]=-1;
for(j=0; j<L2; j++)
{
i=j+k;
if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
}
//evaluate the map quickly in three iterations
tmscore=get_score_fast(r1, r2, xtm, ytm,
x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
if(tmscore>=tmscore_max)
{
tmscore_max=tmscore;
for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
}
}
delete [] ifr;
delete [] y2x_;
return tmscore_max;
}
int L0=getmin(xlen, ylen); //non-redundant to get_initial1
if(L_fr==L0)
{
int n1= (int)(L0*0.1); //my index starts from 0
int n2= (int)(L0*0.89);
int j=0;
for(int i=n1; i<= n2; i++)
{
ifr[j]=ifr[i];
j++;
}
L_fr=j;
}
//gapless threading for the extracted fragment
double tmscore, tmscore_max=-1;
if(Lx<Ly || (Lx==Ly && xlen<=ylen))
{
int L1=L_fr;
int min_len=getmin(L1, ylen);
int min_ali= (int) (min_len/2.5); //minimum size of considered fragment
if(min_ali<=fra_min1) min_ali=fra_min1;
int n1, n2;
n1 = -ylen+min_ali;
n2 = L1-min_ali;
int i, j, k;
for(k=n1; k<=n2; k+=(fast_opt)?3:1)
{
//get the map
for(j=0; j<ylen; j++)
{
i=j+k;
if(i>=0 && i<L1) y2x_[j]=ifr[i];
else y2x_[j]=-1;
}
//evaluate the map quickly in three iterations
tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
d0, d0_search, t, u);
if(tmscore>=tmscore_max)
{
tmscore_max=tmscore;
for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
}
}
}
else
{
int L2=L_fr;
int min_len=getmin(xlen, L2);
int min_ali= (int) (min_len/2.5); //minimum size of considered fragment
if(min_ali<=fra_min1) min_ali=fra_min1;
int n1, n2;
n1 = -L2+min_ali;
n2 = xlen-min_ali;
int i, j, k;
for(k=n1; k<=n2; k++)
{
//get the map
for(j=0; j<ylen; j++) y2x_[j]=-1;
for(j=0; j<L2; j++)
{
i=j+k;
if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
}
//evaluate the map quickly in three iterations
tmscore=get_score_fast(r1, r2, xtm, ytm,
x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
if(tmscore>=tmscore_max)
{
tmscore_max=tmscore;
for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
}
}
}
delete [] ifr;
delete [] y2x_;
return tmscore_max;
}
//heuristic run of dynamic programing iteratively to find the best alignment
//input: initial rotation matrix t, u
// vectors x and y, d0
//output: best alignment that maximizes the TMscore, will be stored in invmap
double DP_iter(double **r1, double **r2, double **xtm, double **ytm,
double **xt, bool **path, double **val, double **x, double **y,
int xlen, int ylen, double t[3], double u[3][3], int invmap0[],
int g1, int g2, int iteration_max, double local_d0_search,
double D0_MIN, double Lnorm, double d0, double score_d8)
{
double gap_open[2]={-0.6, 0};
double rmsd;
int *invmap=new int[ylen+1];
int iteration, i, j, k;
double tmscore, tmscore_max, tmscore_old=0;
int score_sum_method=8, simplify_step=40;
tmscore_max=-1;
//double d01=d0+1.5;
double d02=d0*d0;
for(int g=g1; g<g2; g++)
{
for(iteration=0; iteration<iteration_max; iteration++)
{
NWDP_TM(path, val, x, y, xlen, ylen,
t, u, d02, gap_open[g], invmap);
k=0;
for(j=0; j<ylen; j++)
{
i=invmap[j];
if(i>=0) //aligned
{
xtm[k][0]=x[i][0];
xtm[k][1]=x[i][1];
xtm[k][2]=x[i][2];
ytm[k][0]=y[j][0];
ytm[k][1]=y[j][1];
ytm[k][2]=y[j][2];
k++;
}
}
tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
simplify_step, score_sum_method, &rmsd, local_d0_search,
Lnorm, score_d8, d0);
if(tmscore>tmscore_max)
{
tmscore_max=tmscore;
for(i=0; i<ylen; i++) invmap0[i]=invmap[i];
}
if(iteration>0)
{
if(fabs(tmscore_old-tmscore)<0.000001) break;
}
tmscore_old=tmscore;
}// for iteration
}//for gapopen
delete []invmap;
return tmscore_max;
}
void output_superpose(const string xname, const string yname,
const string fname_super,
double t[3], double u[3][3], const int ter_opt, const int mirror_opt,
const char *seqM, const char *seqxA, const char *seqyA,
const vector<string>&resi_vec1, const vector<string>&resi_vec2,
const char *chainID1, const char *chainID2,
const int xlen, const int ylen, const double d0A, const int n_ali8,
const double rmsd, const double TM1, const double Liden)
{
stringstream buf;
stringstream buf_all;
stringstream buf_atm;
stringstream buf_all_atm;
stringstream buf_all_atm_lig;
stringstream buf_pdb;
stringstream buf_pymol;
stringstream buf_tm;
string line;
double x[3]; // before transform
double x1[3]; // after transform
bool after_ter; // true if passed the "TER" line in PDB
string asym_id; // chain ID
buf_tm<<"REMARK TM-align"
<<"\nREMARK Chain 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen
<<"\nREMARK Chain 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen
<<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0="
<<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")"
<<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD="
<<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd
<<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1
<<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3)
<<((n_ali8>0)?Liden/n_ali8:0)<<endl;
string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n";
string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str();
buf<<rasmol_CA_header;
buf_all<<rasmol_CA_header;
buf_atm<<rasmol_cartoon_header;
buf_all_atm<<rasmol_cartoon_header;
buf_all_atm_lig<<rasmol_cartoon_header;
/* for PDBx/mmCIF only */
map<string,int> _atom_site;
int atom_site_pos;
vector<string> line_vec;
string atom; // 4-character atom name
string AA; // 3-character residue name
string resi; // 4-character residue sequence number
string inscode; // 1-character insertion code
string model_index; // model index
bool is_mmcif=false;
int chain_num=0;
/* used for CONECT record of chain1 */
int ca_idx1=0; // all CA atoms
int lig_idx1=0; // all atoms
vector <int> idx_vec;
/* used for CONECT record of chain2 */
int ca_idx2=0; // all CA atoms
int lig_idx2=0; // all atoms
/* extract aligned region */
vector<string> resi_aln1;
vector<string> resi_aln2;
int i1=-1;
int i2=-1;
int i;
for (i=0;i<strlen(seqM);i++)
{
i1+=(seqxA[i]!='-');
i2+=(seqyA[i]!='-');
if (seqM[i]==' ') continue;
resi_aln1.push_back(resi_vec1[i1].substr(0,4));
resi_aln2.push_back(resi_vec2[i2].substr(0,4));
if (seqM[i]!=':') continue;
buf <<"select "<<resi_aln1.back()<<":A,"
<<resi_aln2.back()<<":B\ncolor red\n";
buf_all<<"select "<<resi_aln1.back()<<":A,"
<<resi_aln2.back()<<":B\ncolor red\n";
}
buf<<"select all\nexit\n"<<buf_tm.str();
buf_all<<"select all\nexit\n"<<buf_tm.str();
ifstream fin;
/* read first file */
after_ter=false;
asym_id="";
fin.open(xname.c_str());
while (fin.good())
{
getline(fin, line);
if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
if (is_mmcif==false && line.size()>=54 &&
(line.compare(0, 6, "ATOM ")==0 ||
line.compare(0, 6, "HETATM")==0)) // PDB format
{
x[0]=atof(line.substr(30,8).c_str());
x[1]=atof(line.substr(38,8).c_str());
x[2]=atof(line.substr(46,8).c_str());
if (mirror_opt) x[2]=-x[2];
transform(t, u, x, x1);
buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed)
<<setprecision(3)
<<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
<<line.substr(54)<<'\n';
if (line[16]!='A' && line[16]!=' ') continue;
if (after_ter && line.compare(0,6,"ATOM ")==0) continue;
lig_idx1++;
buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1
<<line.substr(11,9)<<" A"<<line.substr(22,8)
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
if (after_ter || line.compare(0,6,"ATOM ")) continue;
if (ter_opt>=2)
{
if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1))
{
after_ter=true;
continue;
}
asym_id=line[21];
}
buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1
<<line.substr(11,9)<<" A"<<line.substr(22,8)
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
)!=resi_aln1.end())
{
buf_atm<<"ATOM "<<setw(5)<<lig_idx1
<<line.substr(11,9)<<" A"<<line.substr(22,8)
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
}
if (line.substr(12,4)!=" CA ") continue;
ca_idx1++;
buf_all<<"ATOM "<<setw(5)<<ca_idx1
<<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8)
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
)==resi_aln1.end()) continue;
buf<<"ATOM "<<setw(5)<<ca_idx1
<<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8)
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
idx_vec.push_back(ca_idx1);
}
else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
{
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
if (line.size()) break;
}
if (line.compare(0,11,"_atom_site.")) continue;
_atom_site.clear();
atom_site_pos=0;
_atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
if (line.size()==0) continue;
if (line.compare(0,11,"_atom_site.")) break;
_atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
}
if (is_mmcif==false)
{
buf_pdb.str(string());
is_mmcif=true;
}
while(1)
{
line_vec.clear();
split(line,line_vec);
if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
line_vec[_atom_site["group_PDB"]]!="HETATM") break;
if (_atom_site.count("pdbx_PDB_model_num"))
{
if (model_index.size() && model_index!=
line_vec[_atom_site["pdbx_PDB_model_num"]])
break;
model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
}
x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
if (mirror_opt) x[2]=-x[2];
transform(t, u, x, x1);
if (_atom_site.count("label_alt_id")==0 ||
line_vec[_atom_site["label_alt_id"]]=="." ||
line_vec[_atom_site["label_alt_id"]]=="A")
{
atom=line_vec[_atom_site["label_atom_id"]];
if (atom[0]=='"') atom=atom.substr(1);
if (atom.size() && atom[atom.size()-1]=='"')
atom=atom.substr(0,atom.size()-1);
if (atom.size()==0) atom=" ";
else if (atom.size()==1) atom=" "+atom+" ";
else if (atom.size()==2) atom=" "+atom+" ";
else if (atom.size()==3) atom=" "+atom;
else if (atom.size()>=5) atom=atom.substr(0,4);
AA=line_vec[_atom_site["label_comp_id"]]; // residue name
if (AA.size()==1) AA=" "+AA;
else if (AA.size()==2) AA=" " +AA;
else if (AA.size()>=4) AA=AA.substr(0,3);
if (_atom_site.count("auth_seq_id"))
resi=line_vec[_atom_site["auth_seq_id"]];
else resi=line_vec[_atom_site["label_seq_id"]];
while (resi.size()<4) resi=' '+resi;
if (resi.size()>4) resi=resi.substr(0,4);
inscode=' ';
if (_atom_site.count("pdbx_PDB_ins_code") &&
line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
if (_atom_site.count("auth_asym_id"))
{
if (ter_opt>=2 && ca_idx1 && asym_id.size() &&
asym_id!=line_vec[_atom_site["auth_asym_id"]])
after_ter=true;
asym_id=line_vec[_atom_site["auth_asym_id"]];
}
else if (_atom_site.count("label_asym_id"))
{
if (ter_opt>=2 && ca_idx1 && asym_id.size() &&
asym_id!=line_vec[_atom_site["label_asym_id"]])
after_ter=true;
asym_id=line_vec[_atom_site["label_asym_id"]];
}
buf_pdb<<left<<setw(6)
<<line_vec[_atom_site["group_PDB"]]<<right
<<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
<<AA<<" "<<asym_id[asym_id.size()-1]
<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
if (after_ter==false ||
line_vec[_atom_site["group_pdb"]]=="HETATM")
{
lig_idx1++;
buf_all_atm_lig<<left<<setw(6)
<<line_vec[_atom_site["group_PDB"]]<<right
<<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
<<AA<<" A"<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
if (after_ter==false &&
line_vec[_atom_site["group_PDB"]]=="ATOM")
{
buf_all_atm<<"ATOM "<<setw(6)
<<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
<<AA<<" A"<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
if (find(resi_aln1.begin(),resi_aln1.end(),resi
)!=resi_aln1.end())
{
buf_atm<<"ATOM "<<setw(6)
<<setw(5)<<lig_idx1%100000<<' '
<<atom<<' '<<AA<<" A"<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
}
if (atom==" CA ")
{
ca_idx1++;
buf_all<<"ATOM "<<setw(6)
<<setw(5)<<ca_idx1%100000<<" CA "
<<AA<<" A"<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
if (find(resi_aln1.begin(),resi_aln1.end(),resi
)!=resi_aln1.end())
{
buf<<"ATOM "<<setw(6)
<<setw(5)<<ca_idx1%100000<<" CA "
<<AA<<" A"<<resi<<inscode<<" "
<<setiosflags(ios::fixed)<<setprecision(3)
<<setw(8)<<x1[0]
<<setw(8)<<x1[1]
<<setw(8)<<x1[2]<<'\n';
idx_vec.push_back(ca_idx1);
}
}
}
}
}
while(1)
{
if (fin.good()) getline(fin, line);
else break;
if (line.size()) break;
}
}
}
else if (line.size() && is_mmcif==false)
{
buf_pdb<<line<<'\n';
if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
}
}
fin.close();
buf<<"TER\n";
buf_all<<"TER\n";
buf_atm<<"TER\n";
buf_all_atm<<"TER\n";
buf_all_atm_lig<<"TER\n";
for (i=1;i<ca_idx1;i++) buf_all<<"CONECT"
<<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
<<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
idx_vec.clear();
/* read second file */
after_ter=false;
asym_id="";
fin.open(yname.c_str());
while (fin.good())
{
getline(fin, line);
if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 ||
line.compare(0, 6, "HETATM")==0)) // PDB format
{
if (line[16]!='A' && line[16]!=' ') continue;
if (after_ter && line.compare(0,6,"ATOM ")==0) continue;
lig_idx2++;
buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2
<<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
if (after_ter || line.compare(0,6,"ATOM ")) continue;
if (ter_opt>=2)
{
if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1))
{
after_ter=true;
continue;
}
asym_id=line[21];
}
buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2
<<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
)!=resi_aln2.end())
{
buf_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2
<<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
}
if (line.substr(12,4)!=" CA ") continue;
ca_idx2++;
buf_all<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA "
<<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
)==resi_aln2.end()) continue;
buf<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA "
<<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
idx_vec.push_back(ca_idx1+ca_idx2);
}
else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
{
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
if (line.size()) break;
}
if (line.compare(0,11,"_atom_site.")) continue;
_atom_site.clear();
atom_site_pos=0;
_atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
while(1)
{
if (fin.good()) getline(fin, line);
else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
if (line.size()==0) continue;
if (line.compare(0,11,"_atom_site.")) break;
_atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
}
while(1)
{
line_vec.clear();
split(line,line_vec);
if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
line_vec[_atom_site["group_PDB"]]!="HETATM") break;
if (_atom_site.count("pdbx_PDB_model_num"))
{
if (model_index.size() && model_index!=
line_vec[_atom_site["pdbx_PDB_model_num"]])
break;
model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
}
if (_atom_site.count("label_alt_id")==0 ||
line_vec[_atom_site["label_alt_id"]]=="." ||
line_vec[_atom_site["label_alt_id"]]=="A")
{
atom=line_vec[_atom_site["label_atom_id"]];
if (atom[0]=='"') atom=atom.substr(1);
if (atom.size() && atom[atom.size()-1]=='"')
atom=atom.substr(0,atom.size()-1);
if (atom.size()==0) atom=" ";
else if (atom.size()==1) atom=" "+atom+" ";
else if (atom.size()==2) atom=" "+atom+" ";
else if (atom.size()==3) atom=" "+atom;
else if (atom.size()>=5) atom=atom.substr(0,4);
AA=line_vec[_atom_site["label_comp_id"]]; // residue name
if (AA.size()==1) AA=" "+AA;
else if (AA.size()==2) AA=" " +AA;
else if (AA.size()>=4) AA=AA.substr(0,3);
if (_atom_site.count("auth_seq_id"))
resi=line_vec[_atom_site["auth_seq_id"]];
else resi=line_vec[_atom_site["label_seq_id"]];
while (resi.size()<4) resi=' '+resi;
if (resi.size()>4) resi=resi.substr(0,4);
inscode=' ';
if (_atom_site.count("pdbx_PDB_ins_code") &&
line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
if (ter_opt>=2)
{
if (_atom_site.count("auth_asym_id"))
{
if (ca_idx2 && asym_id.size() &&
asym_id!=line_vec[_atom_site["auth_asym_id"]])
after_ter=true;
else
asym_id=line_vec[_atom_site["auth_asym_id"]];
}
else if (_atom_site.count("label_asym_id"))
{
if (ca_idx2 && asym_id.size() &&
asym_id!=line_vec[_atom_site["label_asym_id"]])
after_ter=true;
else
asym_id=line_vec[_atom_site["label_asym_id"]];
}
}
if (after_ter==false ||
line_vec[_atom_site["group_PDB"]]=="HETATM")
{
lig_idx2++;
buf_all_atm_lig<<left<<setw(6)
<<line_vec[_atom_site["group_PDB"]]<<right
<<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
<<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]]
<<setw(8)<<line_vec[_atom_site["Cartn_y"]]
<<setw(8)<<line_vec[_atom_site["Cartn_z"]]
<<'\n';
if (after_ter==false &&
line_vec[_atom_site["group_PDB"]]=="ATOM")
{
buf_all_atm<<"ATOM "<<setw(6)
<<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
<<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]]
<<setw(8)<<line_vec[_atom_site["Cartn_y"]]
<<setw(8)<<line_vec[_atom_site["Cartn_z"]]
<<'\n';
if (find(resi_aln2.begin(),resi_aln2.end(),resi
)!=resi_aln2.end())
{
buf_atm<<"ATOM "<<setw(6)
<<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
<<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]]
<<setw(8)<<line_vec[_atom_site["Cartn_y"]]
<<setw(8)<<line_vec[_atom_site["Cartn_z"]]
<<'\n';
}
if (atom==" CA ")
{
ca_idx2++;
buf_all<<"ATOM "<<setw(6)
<<setw(5)<<(ca_idx1+ca_idx2)%100000
<<" CA "<<AA<<" B"<<resi<<inscode<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]]
<<setw(8)<<line_vec[_atom_site["Cartn_y"]]
<<setw(8)<<line_vec[_atom_site["Cartn_z"]]
<<'\n';
if (find(resi_aln2.begin(),resi_aln2.end(),resi
)!=resi_aln2.end())
{
buf<<"ATOM "<<setw(6)
<<setw(5)<<(ca_idx1+ca_idx2)%100000
<<" CA "<<AA<<" B"<<resi<<inscode<<" "
<<setw(8)<<line_vec[_atom_site["Cartn_x"]]
<<setw(8)<<line_vec[_atom_site["Cartn_y"]]
<<setw(8)<<line_vec[_atom_site["Cartn_z"]]
<<'\n';
idx_vec.push_back(ca_idx1+ca_idx2);
}
}
}
}
}
if (fin.good()) getline(fin, line);
else break;
}
}
else if (line.size())
{
if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
}
}
fin.close();
buf<<"TER\n";
buf_all<<"TER\n";
buf_atm<<"TER\n";
buf_all_atm<<"TER\n";
buf_all_atm_lig<<"TER\n";
for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT"
<<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
<<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
idx_vec.clear();
/* write pymol script */
ofstream fp;
vector<string> pml_list;
pml_list.push_back(fname_super+"");
pml_list.push_back(fname_super+"_atm");
pml_list.push_back(fname_super+"_all");
pml_list.push_back(fname_super+"_all_atm");
pml_list.push_back(fname_super+"_all_atm_lig");
for (i=0;i<pml_list.size();i++)
{
buf_pymol<<"#!/usr/bin/env pymol\n"
<<"load "<<pml_list[i]<<", format=pdb\n"
<<"hide all\n"
<<((i==0 || i==2)?("show stick\n"):("show cartoon\n"))
<<"color blue, chain A\n"
<<"color red, chain B\n"
<<"set ray_shadow, 0\n"
<<"set stick_radius, 0.3\n"
<<"set sphere_scale, 0.25\n"
<<"show stick, not polymer\n"
<<"show sphere, not polymer\n"
<<"bg_color white\n"
<<"set transparency=0.2\n"
<<"zoom polymer\n"
<<endl;
fp.open((pml_list[i]+".pml").c_str());
fp<<buf_pymol.str();
fp.close();
buf_pymol.str(string());
pml_list[i].clear();
}
pml_list.clear();
/* write rasmol script */
fp.open((fname_super).c_str());
fp<<buf.str();
fp.close();
fp.open((fname_super+"_all").c_str());
fp<<buf_all.str();
fp.close();
fp.open((fname_super+"_atm").c_str());
fp<<buf_atm.str();
fp.close();
fp.open((fname_super+"_all_atm").c_str());
fp<<buf_all_atm.str();
fp.close();
fp.open((fname_super+"_all_atm_lig").c_str());
fp<<buf_all_atm_lig.str();
fp.close();
fp.open((fname_super+".pdb").c_str());
fp<<buf_pdb.str();
fp.close();
/* clear stream */
buf.str(string());
buf_all.str(string());
buf_atm.str(string());
buf_all_atm.str(string());
buf_all_atm_lig.str(string());
buf_pdb.str(string());
buf_tm.str(string());
resi_aln1.clear();
resi_aln2.clear();
asym_id.clear();
line_vec.clear();
atom.clear();
AA.clear();
resi.clear();
inscode.clear();
model_index.clear();
}
/* extract rotation matrix based on TMscore8 */
void output_rotation_matrix(const char* fname_matrix,
const double t[3], const double u[3][3])
{
fstream fout;
fout.open(fname_matrix, ios::out | ios::trunc);
if (fout)// succeed
{
fout << "------ The rotation matrix to rotate Chain_1 to Chain_2 ------\n";
char dest[1000];
sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
fout << string(dest);
for (int k = 0; k < 3; k++)
{
sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
fout << string(dest);
}
fout << "\nCode for rotating Structure A from (x,y,z) to (X,Y,Z):\n"
"for(i=0; i<L; i++)\n"
"{\n"
" X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
" Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
" Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
"}\n";
fout.close();
}
else
cout << "Open file to output rotation matrix fail.\n";
}
//output the final results
void output_results(
const string xname, const string yname,
const char *chainID1, const char *chainID2,
const int xlen, const int ylen, double t[3], double u[3][3],
const double TM1, const double TM2,
const double TM3, const double TM4, const double TM5,
const double rmsd, const double d0_out,
const char *seqM, const char *seqxA, const char *seqyA, const double Liden,
const int n_ali8, const int L_ali,
const double TM_ali, const double rmsd_ali, const double TM_0,
const double d0_0, const double d0A, const double d0B,
const double Lnorm_ass, const double d0_scale,
const double d0a, const double d0u, const char* fname_matrix,
const int outfmt_opt, const int ter_opt, const string fname_super,
const int i_opt, const int a_opt, const bool u_opt, const bool d_opt,
const int mirror_opt,
const vector<string>&resi_vec1, const vector<string>&resi_vec2)
{
if (outfmt_opt<=0)
{
printf("\nName of Chain_1: %s%s (to be superimposed onto Chain_2)\n",
xname.c_str(), chainID1);
printf("Name of Chain_2: %s%s\n", yname.c_str(), chainID2);
printf("Length of Chain_1: %d residues\n", xlen);
printf("Length of Chain_2: %d residues\n\n", ylen);
if (i_opt)
printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali);
printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
printf("TM-score= %6.5f (if normalized by length of Chain_1, i.e., LN=%d, d0=%.2f)\n", TM2, xlen, d0B);
printf("TM-score= %6.5f (if normalized by length of Chain_2, i.e., LN=%d, d0=%.2f)\n", TM1, ylen, d0A);
if (a_opt==1)
printf("TM-score= %6.5f (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
if (u_opt)
printf("TM-score= %6.5f (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
if (d_opt)
printf("TM-score= %6.5f (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen);
printf("(You should use TM-score normalized by length of the reference structure)\n");
//output alignment
printf("\n(\":\" denotes residue pairs of d < %4.1f Angstrom, ", d0_out);
printf("\".\" denotes other aligned residues)\n");
printf("%s\n", seqxA);
printf("%s\n", seqM);
printf("%s\n", seqyA);
}
else if (outfmt_opt==1)
{
printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
xname.c_str(), chainID1, xlen, d0B, Liden/xlen, TM2);
printf("%s\n", seqxA);
printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
yname.c_str(), chainID2, ylen, d0A, Liden/ylen, TM1);
printf("%s\n", seqyA);
printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
if (i_opt)
printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
if(a_opt)
printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
if(u_opt)
printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
if(d_opt)
printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
printf("$$$$\n");
}
else if (outfmt_opt==2)
{
printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
xname.c_str(), chainID1, yname.c_str(), chainID2, TM2, TM1, rmsd,
Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
xlen, ylen, n_ali8);
}
cout << endl;
if (strlen(fname_matrix))
output_rotation_matrix(fname_matrix, t, u);
if (fname_super.size())
output_superpose(xname, yname, fname_super, t, u, ter_opt, mirror_opt,
seqM, seqxA, seqyA, resi_vec1, resi_vec2, chainID1, chainID2,
xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
}
double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm,
double **xt, double **x, double **y, int xlen, int ylen, int invmap[],
int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0,
double d0_search, double score_d8, double t[3], double u[3][3],
const int mol_type)
{
D0_MIN = 0.5;
Lnorm = ylen;
if (mol_type>0) // RNA
{
if (Lnorm<=11) d0=0.3;
else if(Lnorm>11 && Lnorm<=15) d0=0.4;
else if(Lnorm>15 && Lnorm<=19) d0=0.5;
else if(Lnorm>19 && Lnorm<=23) d0=0.6;
else if(Lnorm>23 && Lnorm<30) d0=0.7;
else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
}
else
{
if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8);
else d0 = D0_MIN;
if (d0 < D0_MIN) d0 = D0_MIN;
}
double d0_input = d0;// Scaled by seq_min
double tmscore;// collected alined residues from invmap
int n_al = 0;
int i;
for (int j = 0; j<ylen; j++)
{
i = invmap[j];
if (i >= 0)
{
xtm[n_al][0] = x[i][0];
xtm[n_al][1] = x[i][1];
xtm[n_al][2] = x[i][2];
ytm[n_al][0] = y[j][0];
ytm[n_al][1] = y[j][1];
ytm[n_al][2] = y[j][2];
r1[n_al][0] = x[i][0];
r1[n_al][1] = x[i][1];
r1[n_al][2] = x[i][2];
r2[n_al][0] = y[j][0];
r2[n_al][1] = y[j][1];
r2[n_al][2] = y[j][2];
n_al++;
}
else if (i != -1) PrintErrorAndQuit("Wrong map!\n");
}
L_ali = n_al;
Kabsch(r1, r2, n_al, 0, &RMSD, t, u);
RMSD = sqrt( RMSD/(1.0*n_al) );
int temp_simplify_step = 1;
int temp_score_sum_method = 0;
d0_search = d0_input;
double rms = 0.0;
tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u,
temp_simplify_step, temp_score_sum_method, &rms, d0_input,
score_d8, d0);
tmscore = tmscore * n_al / (1.0*Lnorm);
return tmscore;
}
/* copy the value of t and u into t0,u0 */
void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3])
{
int i,j;
for (i=0;i<3;i++)
{
t0[i]=t[i];
for (j=0;j<3;j++) u0[i][j]=u[i][j];
}
}
/* calculate approximate TM-score given rotation matrix */
double approx_TM(const int xlen, const int ylen, const int a_opt,
double **xa, double **ya, double t[3], double u[3][3],
const int invmap0[], const int mol_type)
{
double Lnorm_0=ylen; // normalized by the second protein
if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen; // longer
else if (a_opt==-1 && xlen<ylen) Lnorm_0=xlen; // shorter
else if (a_opt==1) Lnorm_0=(xlen+ylen)/2.; // average
double D0_MIN;
double Lnorm;
double d0;
double d0_search;
parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
double TMtmp=0;
double d;
double xtmp[3]={0,0,0};
for(int i=0,j=0; j<ylen; j++)
{
i=invmap0[j];
if(i>=0)//aligned
{
transform(t, u, &xa[i][0], &xtmp[0]);
d=sqrt(dist(&xtmp[0], &ya[j][0]));
TMtmp+=1/(1+(d/d0)*(d/d0));
//if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0));
}
}
TMtmp/=Lnorm_0;
return TMtmp;
}
void clean_up_after_approx_TM(int *invmap0, int *invmap,
double **score, bool **path, double **val, double **xtm, double **ytm,
double **xt, double **r1, double **r2, const int xlen, const int minlen)
{
delete [] invmap0;
delete [] invmap;
DeleteArray(&score, xlen+1);
DeleteArray(&path, xlen+1);
DeleteArray(&val, xlen+1);
DeleteArray(&xtm, minlen);
DeleteArray(&ytm, minlen);
DeleteArray(&xt, xlen);
DeleteArray(&r1, minlen);
DeleteArray(&r2, minlen);
return;
}
/* Entry function for TM-align. Return TM-score calculation status:
* 0 - full TM-score calculation
* 1 - terminated due to exception
* 2-7 - pre-terminated due to low TM-score */
int TMalign_main(double **xa, double **ya,
const char *seqx, const char *seqy, const char *secx, const char *secy,
double t0[3], double u0[3][3],
double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
double &d0_0, double &TM_0,
double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
string &seqM, string &seqxA, string &seqyA,
double &rmsd0, int &L_ali, double &Liden,
double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
const int xlen, const int ylen,
const vector<string> sequence, const double Lnorm_ass,
const double d0_scale, const int i_opt, const int a_opt,
const bool u_opt, const bool d_opt, const bool fast_opt,
const int mol_type, const double TMcut=-1)
{
double D0_MIN; //for d0
double Lnorm; //normalization length
double score_d8,d0,d0_search,dcu0;//for TMscore search
double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
double **score; // Input score table for dynamic programming
bool **path; // for dynamic programming
double **val; // for dynamic programming
double **xtm, **ytm; // for TMscore search engine
double **xt; //for saving the superposed version of r_1 or xtm
double **r1, **r2; // for Kabsch rotation
/***********************/
/* allocate memory */
/***********************/
int minlen = min(xlen, ylen);
NewArray(&score, xlen+1, ylen+1);
NewArray(&path, xlen+1, ylen+1);
NewArray(&val, xlen+1, ylen+1);
NewArray(&xtm, minlen, 3);
NewArray(&ytm, minlen, 3);
NewArray(&xt, xlen, 3);
NewArray(&r1, minlen, 3);
NewArray(&r2, minlen, 3);
/***********************/
/* parameter set */
/***********************/
parameter_set4search(xlen, ylen, D0_MIN, Lnorm,
score_d8, d0, d0_search, dcu0);
int simplify_step = 40; //for similified search engine
int score_sum_method = 8; //for scoring method, whether only sum over pairs with dis<score_d8
int i;
int *invmap0 = new int[ylen+1];
int *invmap = new int[ylen+1];
double TM, TMmax=-1;
for(i=0; i<ylen; i++) invmap0[i]=-1;
double ddcc=0.4;
if (Lnorm <= 40) ddcc=0.1; //Lnorm was setted in parameter_set4search
double local_d0_search = d0_search;
//************************************************//
// get initial alignment from user's input: //
// Stick to the initial alignment //
//************************************************//
bool bAlignStick = false;
if (i_opt==3)// if input has set parameter for "-I"
{
// In the original code, this loop starts from 1, which is
// incorrect. Fortran starts from 1 but C++ should starts from 0.
for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
invmap[j] = -1;
int i1 = -1;// in C version, index starts from zero, not from one
int i2 = -1;
int L1 = sequence[0].size();
int L2 = sequence[1].size();
int L = min(L1, L2);// Get positions for aligned residues
for (int kk1 = 0; kk1 < L; kk1++)
{
if (sequence[0][kk1] != '-') i1++;
if (sequence[1][kk1] != '-')
{
i2++;
if (i2 >= ylen || i1 >= xlen) kk1 = L;
else if (sequence[0][kk1] != '-') invmap[i2] = i1;
}
}
//--------------- 2. Align proteins from original alignment
double prevD0_MIN = D0_MIN;// stored for later use
int prevLnorm = Lnorm;
double prevd0 = d0;
TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8,
t, u, mol_type);
D0_MIN = prevD0_MIN;
Lnorm = prevLnorm;
d0 = prevd0;
TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0);
if (TM > TMmax)
{
TMmax = TM;
for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
}
bAlignStick = true;
}
/******************************************************/
/* get initial alignment with gapless threading */
/******************************************************/
if (!bAlignStick)
{
get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0,
d0_search, fast_opt, t, u);
TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap0,
t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
score_d8, d0);
if (TM>TMmax) TMmax = TM;
if (TMcut>0) copy_t_u(t, u, t0, u0);
//run dynamic programing iteratively to find the best alignment
TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen,
t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search,
D0_MIN, Lnorm, d0, score_d8);
if (TM>TMmax)
{
TMmax = TM;
for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
if (TMcut>0) // pre-terminate if TM-score is too low
{
double TMtmp=approx_TM(xlen, ylen, a_opt,
xa, ya, t0, u0, invmap0, mol_type);
if (TMtmp<0.5*TMcut)
{
TM1=TM2=TM3=TM4=TM5=TMtmp;
clean_up_after_approx_TM(invmap0, invmap, score, path, val,
xtm, ytm, xt, r1, r2, xlen, minlen);
return 2;
}
}
/************************************************************/
/* get initial alignment based on secondary structure */
/************************************************************/
get_initial_ss(path, val, secx, secy, xlen, ylen, invmap);
TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
score_d8, d0);
if (TM>TMmax)
{
TMmax = TM;
for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
if (TM > TMmax*0.2)
{
TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
local_d0_search, D0_MIN, Lnorm, d0, score_d8);
if (TM>TMmax)
{
TMmax = TM;
for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
}
if (TMcut>0) // pre-terminate if TM-score is too low
{
double TMtmp=approx_TM(xlen, ylen, a_opt,
xa, ya, t0, u0, invmap0, mol_type);
if (TMtmp<0.52*TMcut)
{
TM1=TM2=TM3=TM4=TM5=TMtmp;
clean_up_after_approx_TM(invmap0, invmap, score, path, val,
xtm, ytm, xt, r1, r2, xlen, minlen);
return 3;
}
}
/************************************************************/
/* get initial alignment based on local superposition */
/************************************************************/
//=initial5 in original TM-align
if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya,
xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN))
{
TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
invmap, t, u, simplify_step, score_sum_method,
local_d0_search, Lnorm, score_d8, d0);
if (TM>TMmax)
{
TMmax = TM;
for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
if (TM > TMmax*ddcc)
{
TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search,
D0_MIN, Lnorm, d0, score_d8);
if (TM>TMmax)
{
TMmax = TM;
for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
}
}
else
cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl;
if (TMcut>0) // pre-terminate if TM-score is too low
{
double TMtmp=approx_TM(xlen, ylen, a_opt,
xa, ya, t0, u0, invmap0, mol_type);
if (TMtmp<0.54*TMcut)
{
TM1=TM2=TM3=TM4=TM5=TMtmp;
clean_up_after_approx_TM(invmap0, invmap, score, path, val,
xtm, ytm, xt, r1, r2, xlen, minlen);
return 4;
}
}
/********************************************************************/
/* get initial alignment by local superposition+secondary structure */
/********************************************************************/
//=initial3 in original TM-align
get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya,
xlen, ylen, invmap0, invmap, D0_MIN, d0);
TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
score_d8, d0);
if (TM>TMmax)
{
TMmax = TM;
for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
if (TM > TMmax*ddcc)
{
TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
local_d0_search, D0_MIN, Lnorm, d0, score_d8);
if (TM>TMmax)
{
TMmax = TM;
for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
}
if (TMcut>0) // pre-terminate if TM-score is too low
{
double TMtmp=approx_TM(xlen, ylen, a_opt,
xa, ya, t0, u0, invmap0, mol_type);
if (TMtmp<0.56*TMcut)
{
TM1=TM2=TM3=TM4=TM5=TMtmp;
clean_up_after_approx_TM(invmap0, invmap, score, path, val,
xtm, ytm, xt, r1, r2, xlen, minlen);
return 5;
}
}
/*******************************************************************/
/* get initial alignment based on fragment gapless threading */
/*******************************************************************/
//=initial4 in original TM-align
get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen,
invmap, d0, d0_search, dcu0, fast_opt, t, u);
TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
score_d8, d0);
if (TM>TMmax)
{
TMmax = TM;
for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
if (TM > TMmax*ddcc)
{
TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN,
Lnorm, d0, score_d8);
if (TM>TMmax)
{
TMmax = TM;
for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
if (TMcut>0) copy_t_u(t, u, t0, u0);
}
}
if (TMcut>0) // pre-terminate if TM-score is too low
{
double TMtmp=approx_TM(xlen, ylen, a_opt,
xa, ya, t0, u0, invmap0, mol_type);
if (TMtmp<0.58*TMcut)
{
TM1=TM2=TM3=TM4=TM5=TMtmp;
clean_up_after_approx_TM(invmap0, invmap, score, path, val,
xtm, ytm, xt, r1, r2, xlen, minlen);
return 6;
}
}
//************************************************//
// get initial alignment from user's input: //
//************************************************//
if (i_opt==1)// if input has set parameter for "-i"
{
for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
invmap[j] = -1;
int i1 = -1;// in C version, index starts from zero, not from one
int i2 = -1;
int L1 = sequence[0].size();
int L2 = sequence[1].size();
int L = min(L1, L2);// Get positions for aligned residues
for (int kk1 = 0; kk1 < L; kk1++)
{
if (sequence[0][kk1] != '-')
i1++;
if (sequence[1][kk1] != '-')
{
i2++;
if (i2 >= ylen || i1 >= xlen) kk1 = L;
else if (sequence[0][kk1] != '-') invmap[i2] = i1;
}
}
//--------------- 2. Align proteins from original alignment
double prevD0_MIN = D0_MIN;// stored for later use
int prevLnorm = Lnorm;
double prevd0 = d0;
TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
d0_search, score_d8, t, u, mol_type);
D
gitextract_uly26sb1/
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── analysis/
│ ├── TMalign
│ ├── TMalign.cpp
│ ├── TMscore
│ ├── TMscore.cpp
│ ├── cal_plddt_dir.py
│ ├── cal_tmscore.py
│ ├── motif_analysis.ipynb
│ ├── plddt_calculate.sh
│ ├── plot.ipynb
│ └── uncond_analysis.ipynb
├── configs/
│ ├── callbacks/
│ │ ├── default.yaml
│ │ ├── fixedbb.yaml
│ │ ├── lm.yaml
│ │ └── structok.yaml
│ ├── config.yaml
│ ├── datamodule/
│ │ ├── cath_4.3.yaml
│ │ ├── pdb.yaml
│ │ ├── tokenized_protein.yaml
│ │ ├── uniref50.yaml
│ │ └── uniref50_hf.yaml
│ ├── experiment/
│ │ ├── base.yaml
│ │ ├── dplm/
│ │ │ ├── cond_dplm_150m.yaml
│ │ │ ├── cond_dplm_3b.yaml
│ │ │ ├── cond_dplm_650m.yaml
│ │ │ ├── dplm_150m.yaml
│ │ │ ├── dplm_150m_ds.yaml
│ │ │ ├── dplm_150m_stage2.yaml
│ │ │ ├── dplm_15b_ds.yaml
│ │ │ ├── dplm_30b_ds.yaml
│ │ │ ├── dplm_3b.yaml
│ │ │ ├── dplm_3b_ds.yaml
│ │ │ ├── dplm_3b_stage2.yaml
│ │ │ ├── dplm_650m.yaml
│ │ │ ├── dplm_650m_ds.yaml
│ │ │ ├── dplm_650m_stage2.yaml
│ │ │ └── mlm_150m.yaml
│ │ ├── dplm2/
│ │ │ ├── dplm2_150m.yaml
│ │ │ ├── dplm2_3b.yaml
│ │ │ ├── dplm2_650m.yaml
│ │ │ ├── dplm2_650m_selfmixup.yaml
│ │ │ └── dplm2_bit_650m.yaml
│ │ └── structok/
│ │ ├── inference/
│ │ │ ├── forward_folding.yaml
│ │ │ ├── inverse_folding.yaml
│ │ │ ├── reconstruction.yaml
│ │ │ ├── unconditional.yaml
│ │ │ └── unconditional_codesign.yaml
│ │ └── structok_lfq_8k_pdb_swissprot_c512.yaml
│ ├── hydra/
│ │ └── default.yaml
│ ├── logger/
│ │ ├── tensorboard.yaml
│ │ └── wandb.yaml
│ ├── paths/
│ │ └── default.yaml
│ ├── test.yaml
│ └── trainer/
│ ├── ddp.yaml
│ ├── ddp_bf16.yaml
│ ├── ddp_fp16.yaml
│ ├── deepspeed_zero2.yaml
│ ├── deepspeed_zero2_bf16.yaml
│ ├── deepspeed_zero2_fp16.yaml
│ ├── deepspeed_zero2_offload.yaml
│ ├── deepspeed_zero3.yaml
│ ├── deepspeed_zero3_bf16.yaml
│ └── default.yaml
├── env.yml
├── generate_dplm.py
├── generate_dplm2.py
├── requirements.txt
├── run/
│ ├── scaffold_generate_dplm.py
│ └── scaffold_generate_dplm2.py
├── scripts/
│ ├── download_cath.sh
│ ├── download_metadata.sh
│ ├── download_motif_scaffolds.sh
│ ├── download_pdb_swissprot_hf.sh
│ ├── download_uniref50_hf.sh
│ └── install.sh
├── setup.cfg
├── setup.py
├── src/
│ └── byprot/
│ ├── __init__.py
│ ├── datamodules/
│ │ ├── __init__.py
│ │ ├── cath_datamodule.py
│ │ ├── dataset/
│ │ │ ├── __init__.py
│ │ │ ├── cath.py
│ │ │ ├── data_utils.py
│ │ │ ├── tokenized_protein.py
│ │ │ ├── uniref.py
│ │ │ └── uniref_hf.py
│ │ ├── pdb_dataset/
│ │ │ ├── __init__.py
│ │ │ ├── all_atom.py
│ │ │ ├── pdb_datamodule.py
│ │ │ ├── protein.py
│ │ │ ├── residue_constants.py
│ │ │ └── utils.py
│ │ ├── tokenized_protein_datamodule.py
│ │ ├── uniref50.py
│ │ └── uniref50_hf.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── dplm/
│ │ │ ├── __init__.py
│ │ │ ├── dplm.py
│ │ │ ├── dplm_invfold.py
│ │ │ └── modules/
│ │ │ ├── dplm_adapter.py
│ │ │ ├── dplm_modeling_esm.py
│ │ │ └── gvp_transformer_encoder.py
│ │ ├── dplm2/
│ │ │ ├── __init__.py
│ │ │ ├── dplm2.py
│ │ │ ├── dplm2_bit.py
│ │ │ └── modules/
│ │ │ ├── dplm2_bit_modeling_esm.py
│ │ │ └── dplm2_modeling_esm.py
│ │ ├── structok/
│ │ │ ├── modules/
│ │ │ │ ├── ema.py
│ │ │ │ ├── folding_utils/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── categorical_mixture.py
│ │ │ │ │ ├── decoder.py
│ │ │ │ │ ├── esmfold.py
│ │ │ │ │ ├── misc.py
│ │ │ │ │ ├── pretrained.py
│ │ │ │ │ ├── structure_module.py
│ │ │ │ │ ├── tri_self_attn_block.py
│ │ │ │ │ └── trunk.py
│ │ │ │ ├── gvp_encoder.py
│ │ │ │ ├── lfq.py
│ │ │ │ ├── loss.py
│ │ │ │ ├── nn.py
│ │ │ │ └── vqvae.py
│ │ │ └── structok_lfq.py
│ │ └── utils.py
│ ├── modules/
│ │ ├── __init__.py
│ │ ├── cross_entropy.py
│ │ ├── metrics.py
│ │ └── protein_metrics.py
│ ├── tasks/
│ │ ├── __init__.py
│ │ ├── lm/
│ │ │ ├── dplm.py
│ │ │ ├── dplm2.py
│ │ │ ├── dplm_invfold.py
│ │ │ └── mlm.py
│ │ └── struct_tokenizer/
│ │ └── structok.py
│ ├── testing_pipeline.py
│ ├── training_pipeline.py
│ └── utils/
│ ├── __init__.py
│ ├── callbacks.py
│ ├── config.py
│ ├── io.py
│ ├── logger.py
│ ├── lr_scheduler.py
│ ├── optim.py
│ ├── protein/
│ │ ├── __init__.py
│ │ ├── all_atom.py
│ │ ├── evaluator_dplm2.py
│ │ ├── folding_model.py
│ │ ├── residue_constants.py
│ │ ├── tokenize_pdb.py
│ │ └── utils.py
│ ├── registry.py
│ ├── scaffold_utils.py
│ └── strategies.py
├── test.py
├── train.py
└── vendor/
└── openfold/
├── CITATION.cff
├── Dockerfile
├── LICENSE
├── README.md
├── deepspeed_config.json
├── environment.yml
├── notebooks/
│ ├── OpenFold.ipynb
│ └── environment.yml
├── openfold/
│ ├── __init__.py
│ ├── config.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── data_modules.py
│ │ ├── data_pipeline.py
│ │ ├── data_transforms.py
│ │ ├── errors.py
│ │ ├── feature_pipeline.py
│ │ ├── input_pipeline.py
│ │ ├── mmcif_parsing.py
│ │ ├── parsers.py
│ │ ├── templates.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── hhblits.py
│ │ ├── hhsearch.py
│ │ ├── jackhmmer.py
│ │ ├── kalign.py
│ │ └── utils.py
│ ├── model/
│ │ ├── __init__.py
│ │ ├── dropout.py
│ │ ├── embedders.py
│ │ ├── evoformer.py
│ │ ├── heads.py
│ │ ├── model.py
│ │ ├── msa.py
│ │ ├── outer_product_mean.py
│ │ ├── pair_transition.py
│ │ ├── primitives.py
│ │ ├── structure_module.py
│ │ ├── template.py
│ │ ├── torchscript.py
│ │ ├── triangular_attention.py
│ │ └── triangular_multiplicative_update.py
│ ├── np/
│ │ ├── __init__.py
│ │ ├── protein.py
│ │ ├── relax/
│ │ │ ├── __init__.py
│ │ │ ├── amber_minimize.py
│ │ │ ├── cleanup.py
│ │ │ ├── relax.py
│ │ │ └── utils.py
│ │ └── residue_constants.py
│ ├── resources/
│ │ ├── __init__.py
│ │ └── stereo_chemical_props.txt
│ └── utils/
│ ├── __init__.py
│ ├── argparse.py
│ ├── callbacks.py
│ ├── checkpointing.py
│ ├── chunk_utils.py
│ ├── exponential_moving_average.py
│ ├── feats.py
│ ├── import_weights.py
│ ├── kernel/
│ │ ├── __init__.py
│ │ ├── attention_core.py
│ │ └── csrc/
│ │ ├── compat.h
│ │ ├── softmax_cuda.cpp
│ │ ├── softmax_cuda_kernel.cu
│ │ └── softmax_cuda_stub.cpp
│ ├── logger.py
│ ├── loss.py
│ ├── lr_schedulers.py
│ ├── precision_utils.py
│ ├── rigid_utils.py
│ ├── script_utils.py
│ ├── seed.py
│ ├── superimposition.py
│ ├── suppress_output.py
│ ├── tensor_utils.py
│ ├── trace_utils.py
│ └── validation_metrics.py
├── run_pretrained_openfold.py
├── scripts/
│ ├── activate_conda_env.sh
│ ├── alignment_db_scripts/
│ │ ├── create_alignment_db.py
│ │ └── unify_alignment_db_indices.py
│ ├── build_deepspeed_config.py
│ ├── colabfold_search.sh
│ ├── convert_of_weights_to_jax.py
│ ├── data_dir_to_fasta.py
│ ├── deactivate_conda_env.sh
│ ├── download_alphafold_dbs.sh
│ ├── download_alphafold_params.sh
│ ├── download_bfd.sh
│ ├── download_cameo.py
│ ├── download_colabfold_envdb.sh
│ ├── download_mgnify.sh
│ ├── download_mmseqs_dbs.sh
│ ├── download_openfold_params.sh
│ ├── download_openfold_params_gdrive.sh
│ ├── download_openfold_params_huggingface.sh
│ ├── download_pdb70.sh
│ ├── download_pdb_mmcif.sh
│ ├── download_roda_pdbs.sh
│ ├── download_small_bfd.sh
│ ├── download_uniclust30.sh
│ ├── download_uniref30.sh
│ ├── download_uniref90.sh
│ ├── flatten_roda.sh
│ ├── generate_alphafold_feature_dict.py
│ ├── generate_chain_data_cache.py
│ ├── generate_mmcif_cache.py
│ ├── install_hh_suite.sh
│ ├── install_third_party_dependencies.sh
│ ├── precompute_alignments.py
│ ├── precompute_alignments_mmseqs.py
│ ├── precompute_embeddings.py
│ ├── prep_mmseqs_dbs.sh
│ ├── prep_proteinnet_msas.py
│ ├── run_unit_tests.sh
│ ├── slurm_scripts/
│ │ └── run_uniclust30_search.sh
│ ├── unpack_proteinnet.py
│ ├── utils.py
│ ├── vars.sh
│ └── zero_to_fp32.py
├── setup.py
├── tests/
│ ├── __init__.py
│ ├── compare_utils.py
│ ├── config.py
│ ├── data_utils.py
│ ├── test_data/
│ │ ├── alignments/
│ │ │ ├── bfd_uniclust_hits.a3m
│ │ │ ├── mgnify_hits.sto
│ │ │ ├── pdb70_hits.hhr
│ │ │ └── uniref90_hits.sto
│ │ ├── alphafold_feature_dict.pickle
│ │ ├── features.pkl
│ │ ├── mmcifs/
│ │ │ ├── 1hf9.cif
│ │ │ ├── 1psm.cif
│ │ │ ├── 2crb.cif
│ │ │ ├── 2q2k.cif
│ │ │ ├── 3u8v.cif
│ │ │ ├── 3zee.cif
│ │ │ ├── 4i6p.cif
│ │ │ ├── 4zey.cif
│ │ │ └── 5kc1.cif
│ │ └── short.fasta
│ ├── test_data_pipeline.py
│ ├── test_data_transforms.py
│ ├── test_embedders.py
│ ├── test_evoformer.py
│ ├── test_feats.py
│ ├── test_import_weights.py
│ ├── test_kernels.py
│ ├── test_loss.py
│ ├── test_model.py
│ ├── test_msa.py
│ ├── test_outer_product_mean.py
│ ├── test_pair_transition.py
│ ├── test_primitives.py
│ ├── test_structure_module.py
│ ├── test_template.py
│ ├── test_triangular_attention.py
│ ├── test_triangular_multiplicative_update.py
│ └── test_utils.py
├── thread_sequence.py
└── train_openfold.py
SYMBOL INDEX (1958 symbols across 175 files)
FILE: analysis/TMalign.cpp
function print_version (line 86) | void print_version()
function print_extra_help (line 98) | void print_extra_help()
function print_help (line 176) | void print_help(bool h_opt=false)
function PrintErrorAndQuit (line 245) | void PrintErrorAndQuit(const string sErrorString)
function T (line 251) | inline T getmin(const T &a, const T &b)
function NewArray (line 256) | void NewArray(A *** array, int Narray1, int Narray2)
function DeleteArray (line 262) | void DeleteArray(A *** array, int Narray)
function string (line 270) | string AAmap(char A)
function AAmap (line 299) | char AAmap(const string &AA)
function split (line 333) | void split(const string &line, vector<string> &line_vec,
function string (line 354) | string Trim(const string &inputString)
function split_white (line 370) | void split_white(const string &line, vector<string> &line_vec,
function get_PDB_lines (line 397) | size_t get_PDB_lines(const string filename,
function get_FASTA_lines (line 738) | size_t get_FASTA_lines(const string filename,
function extract_aln_from_resi (line 788) | int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
function read_PDB (line 854) | int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
function dist (line 873) | double dist(double x[3], double y[3])
function dot (line 882) | double dot(double *a, double *b)
function transform (line 887) | void transform(double t[3], double u[3][3], double *x, double *x1)
function do_rotation (line 894) | void do_rotation(double **x, double **x1, int len, double t[3], double u...
function read_user_alignment (line 905) | void read_user_alignment(vector<string>&sequence, const string &fname_lign,
function file2chainlist (line 953) | void file2chainlist(vector<string>&chain_list, const string &name,
function Kabsch (line 983) | bool Kabsch(double **x, double **y, int n, int mode, double *rms,
function NWDP_TM (line 1321) | void NWDP_TM(double **score, bool **path, double **val,
function NWDP_TM (line 1403) | void NWDP_TM(bool **path, double **val, double **x, double **y,
function NWDP_SE (line 1490) | void NWDP_SE(bool **path, double **val, double **x, double **y,
function NWDP_TM (line 1570) | void NWDP_TM(bool **path, double **val, const char *secx, const char *secy,
function parameter_set4search (line 1647) | void parameter_set4search(const int xlen, const int ylen,
function parameter_set4final_C3prime (line 1669) | void parameter_set4final_C3prime(const double len, double &D0_MIN,
function parameter_set4final (line 1687) | void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
function parameter_set4scale (line 1707) | void parameter_set4scale(const int len, const double d_s, double &Lnorm,
function score_fun8 (line 1719) | int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
function score_fun8_standard (line 1762) | int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
function TMscore8_search (line 1807) | double TMscore8_search(double **r1, double **r2, double **xtm, double **...
function TMscore8_search_standard (line 1962) | double TMscore8_search_standard( double **r1, double **r2,
function detailed_search (line 2122) | double detailed_search(double **r1, double **r2, double **xtm, double **...
function detailed_search_standard (line 2156) | double detailed_search_standard( double **r1, double **r2,
function get_score_fast (line 2194) | double get_score_fast( double **r1, double **r2, double **xtm, double **...
function get_initial (line 2341) | double get_initial(double **r1, double **r2, double **xtm, double **ytm,
function smooth (line 2392) | void smooth(int *sec, int len)
function sec_str (line 2436) | char sec_str(double dis13, double dis14, double dis15,
function make_sec (line 2466) | void make_sec(double **x, int len, char *sec)
function get_initial_ss (line 2499) | void get_initial_ss(bool **path, double **val,
function get_initial5 (line 2514) | bool get_initial5( double **r1, double **r2, double **xtm, double **ytm,
function score_matrix_rmsd_sec (line 2613) | void score_matrix_rmsd_sec( double **r1, double **r2, double **score,
function get_initial_ssplus (line 2665) | void get_initial_ssplus(double **r1, double **r2, double **score, bool *...
function find_max_frag (line 2678) | void find_max_frag(double **x, int len, int *start_max,
function get_initial_fgt (line 2744) | double get_initial_fgt(double **r1, double **r2, double **xtm, double **...
function DP_iter (line 2979) | double DP_iter(double **r1, double **r2, double **xtm, double **ytm,
function output_superpose (line 3047) | void output_superpose(const string xname, const string yname,
function output_rotation_matrix (line 3666) | void output_rotation_matrix(const char* fname_matrix,
function output_results (line 3696) | void output_results(
function standard_TMscore (line 3787) | double standard_TMscore(double **r1, double **r2, double **xtm, double *...
function copy_t_u (line 3858) | void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3])
function approx_TM (line 3869) | double approx_TM(const int xlen, const int ylen, const int a_opt,
function clean_up_after_approx_TM (line 3902) | void clean_up_after_approx_TM(int *invmap0, int *invmap,
function TMalign_main (line 3923) | int TMalign_main(double **xa, double **ya,
function CPalign_main (line 4528) | int CPalign_main(double **xa, double **ya,
function main (line 4686) | int main(int argc, char *argv[])
FILE: analysis/TMscore.cpp
function print_version (line 72) | void print_version()
function print_extra_help (line 88) | void print_extra_help()
function print_help (line 159) | void print_help(bool h_opt=false)
type redi (line 268) | namespace redi
type pstreams (line 271) | struct pstreams
class basic_pstreambuf (line 296) | class basic_pstreambuf
type buf_read_src (line 409) | enum buf_read_src { rsrc_out = 0, rsrc_err = 1 }
class pstream_common (line 472) | class pstream_common
class basic_ipstream (line 543) | class basic_ipstream
method pmode (line 554) | pmode readable(pmode mode)
method basic_ipstream (line 569) | basic_ipstream()
method basic_ipstream (line 583) | explicit
method basic_ipstream (line 599) | basic_ipstream( const std::string& file,
method basic_ipstream (line 615) | explicit
method basic_ipstream (line 622) | explicit
method open (line 645) | void
method open (line 661) | void
method basic_ipstream (line 673) | basic_ipstream&
method basic_ipstream (line 684) | basic_ipstream&
class basic_opstream (line 703) | class basic_opstream
method basic_opstream (line 721) | basic_opstream()
method basic_opstream (line 735) | explicit
method basic_opstream (line 751) | basic_opstream( const std::string& file,
method basic_opstream (line 767) | explicit
method basic_opstream (line 781) | explicit
method open (line 803) | void
method open (line 819) | void
class basic_pstream (line 843) | class basic_pstream
method basic_pstream (line 861) | basic_pstream()
method basic_pstream (line 875) | explicit
method basic_pstream (line 891) | basic_pstream( const std::string& file,
method basic_pstream (line 907) | explicit
method basic_pstream (line 921) | explicit
method open (line 943) | void
method open (line 959) | void
method basic_pstream (line 971) | basic_pstream&
method basic_pstream (line 982) | basic_pstream&
class basic_rpstream (line 1013) | class basic_rpstream
method basic_rpstream (line 1033) | basic_rpstream()
method basic_rpstream (line 1047) | explicit
method basic_rpstream (line 1063) | basic_rpstream( const std::string& file,
method basic_rpstream (line 1079) | explicit
method basic_rpstream (line 1094) | explicit
method open (line 1112) | void
method open (line 1128) | void
method istream_type (line 1141) | istream_type&
method istream_type (line 1153) | istream_type&
function close_fd (line 1354) | inline void
function close_fd_array (line 1372) | inline void
function pid_t (line 1515) | pid_t
function PrintErrorAndQuit (line 2462) | void PrintErrorAndQuit(const string sErrorString)
function T (line 2468) | inline T getmin(const T &a, const T &b)
function NewArray (line 2473) | void NewArray(A *** array, int Narray1, int Narray2)
function DeleteArray (line 2479) | void DeleteArray(A *** array, int Narray)
function string (line 2487) | string AAmap(char A)
function AAmap (line 2517) | char AAmap(const string &AA)
function split (line 2554) | void split(const string &line, vector<string> &line_vec,
function string (line 2575) | string Trim(const string &inputString)
function get_PDB_lines (line 2584) | size_t get_PDB_lines(const string filename,
function read_PDB (line 3002) | int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
function dist (line 3021) | double dist(double x[3], double y[3])
function dot (line 3030) | double dot(double *a, double *b)
function transform (line 3035) | void transform(double t[3], double u[3][3], double *x, double *x1)
function do_rotation (line 3042) | void do_rotation(double **x, double **x1, int len, double t[3], double u...
function file2chainlist (line 3056) | void file2chainlist(vector<string>&chain_list, const string &name,
function parameter_set4search (line 3079) | void parameter_set4search(const int xlen, const int ylen,
function parameter_set4final_C3prime (line 3101) | void parameter_set4final_C3prime(const double len, double &D0_MIN,
function parameter_set4final (line 3119) | void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
function parameter_set4scale (line 3139) | void parameter_set4scale(const int len, const double d_s, double &Lnorm,
function Kabsch (line 3162) | bool Kabsch(double **x, double **y, int n, int mode, double *rms,
function init_gotoh_mat (line 3632) | void init_gotoh_mat(int **S, int **JumpH, int **JumpV, int **P,
function find_highest_align_score (line 3669) | void find_highest_align_score( int **S, int **P,
function calculate_score_gotoh (line 3716) | int calculate_score_gotoh(const int xlen,const int ylen, int **S,
function trace_back_gotoh (line 3806) | void trace_back_gotoh(const char *seqx, const char *seqy,
function trace_back_sw (line 3882) | void trace_back_sw(const char *seqx, const char *seqy,
function NWalign_main (line 3995) | int NWalign_main(const char *seqx, const char *seqy, const int xlen,
function extract_aln_from_resi (line 4053) | int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
function score_fun8 (line 4189) | int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
function score_fun8_standard (line 4232) | int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
function TMscore8_search (line 4277) | double TMscore8_search(double **r1, double **r2, double **xtm, double **...
function TMscore8_search_standard (line 4432) | double TMscore8_search_standard( double **r1, double **r2,
function detailed_search_standard (line 4585) | double detailed_search_standard( double **r1, double **r2,
function smooth (line 4622) | void smooth(int *sec, int len)
function output_pymol (line 4666) | void output_pymol(const string xname, const string yname,
function output_rasmol (line 4992) | void output_rasmol(const string xname, const string yname,
function output_rotation_matrix (line 5648) | void output_rotation_matrix(const char* fname_matrix,
function standard_TMscore (line 5677) | double standard_TMscore(double **r1, double **r2, double **xtm, double *...
function approx_TM (line 5748) | double approx_TM(const int xlen, const int ylen, const int a_opt,
function clean_up_after_approx_TM (line 5781) | void clean_up_after_approx_TM(int *invmap0, int *invmap,
function score_fun8 (line 5798) | int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
function score_fun8_standard (line 5870) | int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
function TMscore8_search (line 5942) | double TMscore8_search(double **r1, double **r2, double **xtm, double **...
function TMscore8_search_standard (line 6108) | double TMscore8_search_standard( double **r1, double **r2,
function detailed_search_standard (line 6272) | double detailed_search_standard( double **r1, double **r2,
function TMscore_main (line 6315) | int TMscore_main(double **xa, double **ya,
function output_TMscore_results (line 6638) | void output_TMscore_results(
function main (line 6762) | int main(int argc, char *argv[])
FILE: analysis/cal_plddt_dir.py
function read_fasta (line 49) | def read_fasta(
function read_alignment_lines (line 65) | def read_alignment_lines(
function enable_cpu_offloading (line 95) | def enable_cpu_offloading(model):
function init_model_on_gpu_with_cpu_offloading (line 117) | def init_model_on_gpu_with_cpu_offloading(model):
function create_batched_sequence_datasest (line 126) | def create_batched_sequence_datasest(
function create_parser (line 142) | def create_parser():
function run (line 198) | def run(args):
function main (line 285) | def main():
FILE: analysis/cal_tmscore.py
function run_tmalign (line 18) | def run_tmalign(query, reference, fast=True):
function tm_one2refs (line 44) | def tm_one2refs(
function tm_set2set (line 60) | def tm_set2set(querys, targets, save_path, n_threads=mp.cpu_count()):
function main (line 78) | def main():
function cal_novelty (line 111) | def cal_novelty(query_dir, reference_dir):
function cal_diversity (line 133) | def cal_diversity(query_dir, reference_dir):
FILE: generate_dplm.py
function format_check (line 11) | def format_check(args):
function initialize_generation (line 42) | def initialize_generation(
function generate (line 73) | def generate(args):
function main (line 118) | def main():
FILE: generate_dplm2.py
function initialize_conditional_generation (line 16) | def initialize_conditional_generation(
function initialize_generation (line 136) | def initialize_generation(
function unconditional_generate (line 199) | def unconditional_generate(args):
function conditional_generate_from_fasta (line 291) | def conditional_generate_from_fasta(args):
function save_fasta (line 331) | def save_fasta(
function save_results (line 354) | def save_results(
function main (line 462) | def main():
FILE: run/scaffold_generate_dplm.py
function generate (line 19) | def generate(args, saveto):
function main (line 88) | def main():
FILE: run/scaffold_generate_dplm2.py
function generate (line 17) | def generate(args, saveto):
function save_results (line 130) | def save_results(
function main (line 188) | def main():
FILE: src/byprot/datamodules/__init__.py
function register_datamodule (line 16) | def register_datamodule(name):
FILE: src/byprot/datamodules/cath_datamodule.py
class CATHDataModule (line 25) | class CATHDataModule(LightningDataModule):
method __init__ (line 26) | def __init__(
method setup (line 55) | def setup(self, stage: Optional[str] = None):
method _build_batch_sampler (line 92) | def _build_batch_sampler(
method train_dataloader (line 109) | def train_dataloader(self):
method val_dataloader (line 124) | def val_dataloader(self):
method test_dataloader (line 137) | def test_dataloader(self):
FILE: src/byprot/datamodules/dataset/cath.py
function CATH (line 24) | def CATH(
function collate_batch (line 143) | def collate_batch(
class CoordBatchConverter (line 190) | class CoordBatchConverter(esm.data.BatchConverter):
method __init__ (line 191) | def __init__(
method __call__ (line 203) | def __call__(self, raw_batch: Sequence[Tuple[Sequence, str]], device=N...
method from_lists (line 269) | def from_lists(
method collate_dense_tensors (line 299) | def collate_dense_tensors(samples, pad_v):
function new_arange (line 327) | def new_arange(x, *size):
class ToSabdabDataFormat (line 338) | class ToSabdabDataFormat(object):
method __init__ (line 339) | def __init__(self, alphabet) -> None:
method _map_aatypes (line 352) | def _map_aatypes(self, tokens):
method __call__ (line 359) | def __call__(self, batch_data) -> Any:
function ToPiFoldFormat (line 390) | def ToPiFoldFormat(X, S, cfd, pad_special_tokens=False):
class Featurizer (line 410) | class Featurizer(object):
method __init__ (line 411) | def __init__(
method __call__ (line 428) | def __call__(self, raw_batch: dict):
FILE: src/byprot/datamodules/dataset/data_utils.py
class Alphabet (line 28) | class Alphabet(object):
method __init__ (line 29) | def __init__(
method __getattr__ (line 76) | def __getattr__(self, name: str) -> Any:
method __len__ (line 84) | def __len__(self):
method get_featurizer (line 87) | def get_featurizer(self, name="cath", **kwds):
method featurizer (line 102) | def featurizer(self):
method featurize (line 105) | def featurize(self, raw_batch, **kwds):
method decode (line 108) | def decode(self, batch_ids, return_as="str", remove_special=False):
class PDBDataProcessor (line 127) | class PDBDataProcessor(object):
method parse_PDB (line 128) | def parse_PDB(
method parse_PDB_biounits (line 260) | def parse_PDB_biounits(self, x, atoms=["N", "CA", "C"], chain=None):
function identity (line 379) | def identity(example):
class MaxTokensBatchSampler (line 383) | class MaxTokensBatchSampler(BatchSampler):
method __init__ (line 384) | def __init__(
method __len__ (line 417) | def __len__(self):
method __iter__ (line 420) | def __iter__(self) -> Iterator[DataChunk[T_co]]:
method _build_batches (line 424) | def _build_batches(self):
method set_epoch (line 488) | def set_epoch(self, epoch):
FILE: src/byprot/datamodules/dataset/tokenized_protein.py
function load_vocab_file (line 22) | def load_vocab_file(vocab_file):
function preprocess_dataset (line 28) | def preprocess_dataset(csv_path, data_bin, split):
class SortishSampler (line 98) | class SortishSampler(Sampler):
method __init__ (line 102) | def __init__(
method __iter__ (line 128) | def __iter__(self):
method __len__ (line 143) | def __len__(self):
method set_epoch (line 146) | def set_epoch(self, epoch):
class ApproxBatchSampler (line 150) | class ApproxBatchSampler(BatchSampler):
method __init__ (line 167) | def __init__(
method _build_batches (line 189) | def _build_batches(self):
method __len__ (line 235) | def __len__(self):
method __iter__ (line 238) | def __iter__(self):
class TokenizedProteinDataset (line 243) | class TokenizedProteinDataset(Dataset):
method __init__ (line 252) | def __init__(
method __len__ (line 276) | def __len__(self):
method get_metadata_lens (line 279) | def get_metadata_lens(self):
method __getitem__ (line 282) | def __getitem__(self, idx):
class Subset (line 323) | class Subset(Dataset[T_co]):
method __init__ (line 335) | def __init__(self, dataset: Dataset[T_co], indices: Sequence[int]) -> ...
method __getitem__ (line 339) | def __getitem__(self, idx):
method __len__ (line 344) | def __len__(self):
class DPLM2Tokenizer (line 348) | class DPLM2Tokenizer(EsmTokenizer):
method __init__ (line 361) | def __init__(
method aa_eos_token (line 409) | def aa_eos_token(self) -> str:
method aa_cls_token (line 420) | def aa_cls_token(self) -> str:
method aa_unk_token (line 431) | def aa_unk_token(self) -> str:
method aa_mask_token (line 442) | def aa_mask_token(self) -> str:
method struct_eos_token (line 453) | def struct_eos_token(self) -> str:
method struct_cls_token (line 464) | def struct_cls_token(self) -> str:
method struct_unk_token (line 475) | def struct_unk_token(self) -> str:
method struct_mask_token (line 486) | def struct_mask_token(self) -> str:
method aa_cls_token (line 497) | def aa_cls_token(self, value):
method aa_eos_token (line 505) | def aa_eos_token(self, value):
method aa_unk_token (line 513) | def aa_unk_token(self, value):
method aa_mask_token (line 521) | def aa_mask_token(self, value):
method struct_cls_token (line 529) | def struct_cls_token(self, value):
method struct_eos_token (line 537) | def struct_eos_token(self, value):
method struct_unk_token (line 545) | def struct_unk_token(self, value):
method struct_mask_token (line 553) | def struct_mask_token(self, value):
class DPLM2Collater (line 561) | class DPLM2Collater(object):
method __init__ (line 562) | def __init__(self, tokenizer):
method __call__ (line 567) | def __call__(self, raw_batch):
function setup_dataloader (line 610) | def setup_dataloader(
function load_dataset_from_hf (line 643) | def load_dataset_from_hf(data_path, split):
FILE: src/byprot/datamodules/dataset/uniref.py
class SortishSampler (line 32) | class SortishSampler(Sampler):
method __init__ (line 36) | def __init__(
method __iter__ (line 61) | def __iter__(self):
method __len__ (line 76) | def __len__(self):
method set_epoch (line 79) | def set_epoch(self, epoch):
class ApproxBatchSampler (line 83) | class ApproxBatchSampler(BatchSampler):
method __init__ (line 100) | def __init__(
method _build_batches (line 121) | def _build_batches(self):
method __len__ (line 176) | def __len__(self):
method __iter__ (line 179) | def __iter__(self):
class UniRefDataset (line 184) | class UniRefDataset(Dataset):
method __init__ (line 193) | def __init__(
method __len__ (line 211) | def __len__(self):
method get_metadata_lens (line 214) | def get_metadata_lens(self):
method __getitem__ (line 217) | def __getitem__(self, idx):
class Subset (line 233) | class Subset(Dataset[T_co]):
method __init__ (line 244) | def __init__(self, dataset: Dataset[T_co], indices: Sequence[int]) -> ...
method __getitem__ (line 248) | def __getitem__(self, idx):
method __len__ (line 253) | def __len__(self):
class DPLMCollater (line 257) | class DPLMCollater(object):
method __init__ (line 258) | def __init__(self, tokenizer_path=None):
method __call__ (line 269) | def __call__(self, sequences):
function setup_dataloader (line 289) | def setup_dataloader(
FILE: src/byprot/datamodules/dataset/uniref_hf.py
class SortishSampler (line 21) | class SortishSampler(Sampler):
method __init__ (line 25) | def __init__(
method __iter__ (line 50) | def __iter__(self):
method __len__ (line 65) | def __len__(self):
method set_epoch (line 68) | def set_epoch(self, epoch):
class ApproxBatchSampler (line 72) | class ApproxBatchSampler(BatchSampler):
method __init__ (line 89) | def __init__(
method _build_batches (line 110) | def _build_batches(self):
method __len__ (line 165) | def __len__(self):
method __iter__ (line 168) | def __iter__(self):
class UniRefHFDataset (line 173) | class UniRefHFDataset(Dataset):
method __init__ (line 182) | def __init__(
method __len__ (line 194) | def __len__(self):
method get_metadata_lens (line 197) | def get_metadata_lens(self):
method __getitem__ (line 200) | def __getitem__(self, idx):
class UniRefDatasetForTesting (line 213) | class UniRefDatasetForTesting(Dataset):
method __init__ (line 214) | def __init__(
method __len__ (line 230) | def __len__(self):
method get_metadata_lens (line 233) | def get_metadata_lens(self):
method __getitem__ (line 236) | def __getitem__(self, idx):
class Subset (line 241) | class Subset(Dataset[T_co]):
method __init__ (line 252) | def __init__(self, dataset: Dataset[T_co], indices: Sequence[int]) -> ...
method __getitem__ (line 256) | def __getitem__(self, idx):
method __len__ (line 261) | def __len__(self):
class DPLMCollater (line 265) | class DPLMCollater(object):
method __init__ (line 269) | def __init__(self, tokenizer_path=None):
method __call__ (line 280) | def __call__(self, sequences):
function setup_dataloader (line 300) | def setup_dataloader(
function load_dataset_from_hf (line 331) | def load_dataset_from_hf(data_path, split):
FILE: src/byprot/datamodules/pdb_dataset/all_atom.py
function to_atom37 (line 44) | def to_atom37(trans, rots):
function torsion_angles_to_frames (line 53) | def torsion_angles_to_frames(
function prot_to_torsion_angles (line 129) | def prot_to_torsion_angles(aatype, atom37, atom37_mask):
function frames_to_atom14_pos (line 144) | def frames_to_atom14_pos(
function compute_backbone (line 180) | def compute_backbone(bb_rigids, psi_torsions):
function calculate_neighbor_angles (line 205) | def calculate_neighbor_angles(R_ac, R_ab):
function vector_projection (line 230) | def vector_projection(R_ab, P_n):
function transrot_to_atom37 (line 250) | def transrot_to_atom37(transrot_traj, res_mask):
function atom37_from_trans_rot (line 270) | def atom37_from_trans_rot(trans, rots, res_mask):
function process_trans_rot_traj (line 284) | def process_trans_rot_traj(trans_traj, rots_traj, res_mask):
function adjust_oxygen_pos (line 294) | def adjust_oxygen_pos(
FILE: src/byprot/datamodules/pdb_dataset/pdb_datamodule.py
function load_from_pdb (line 44) | def load_from_pdb(pdb_path, batch=False):
function collate_fn (line 51) | def collate_fn(batch: list):
function exists (line 61) | def exists(o):
class PdbDataModule (line 66) | class PdbDataModule(LightningDataModule):
method __init__ (line 67) | def __init__(self, data_cfg):
method setup (line 77) | def setup(self, stage: str):
method train_dataloader (line 89) | def train_dataloader(self, rank=None, num_replicas=None):
method _build_batch_sampler (line 153) | def _build_batch_sampler(
method val_dataloader (line 178) | def val_dataloader(self):
class PdbDataset (line 189) | class PdbDataset(Dataset):
method __init__ (line 190) | def __init__(
method is_training (line 208) | def is_training(self):
method dataset_cfg (line 212) | def dataset_cfg(self):
method _init_metadata (line 215) | def _init_metadata(self):
method sample_cluster (line 335) | def sample_cluster(self, pdb_csv, seed):
method _process_csv_row2 (line 338) | def _process_csv_row2(self, processed_file_path):
method process_chain (line 387) | def process_chain(chain_feats: dict, random_crop=False, crop_size=256):
method __len__ (line 510) | def __len__(self):
method __getitem__ (line 513) | def __getitem__(self, idx):
method _process_csv_row (line 522) | def _process_csv_row(self, csv_row):
class LengthBatcher (line 573) | class LengthBatcher:
method __init__ (line 574) | def __init__(
method _replica_epoch_batches (line 610) | def _replica_epoch_batches(self):
method _create_batches (line 650) | def _create_batches(self):
method __iter__ (line 664) | def __iter__(self):
method __len__ (line 669) | def __len__(self):
function _rog_quantile_curve (line 677) | def _rog_quantile_curve(df, quantile, eval_x):
FILE: src/byprot/datamodules/pdb_dataset/protein.py
class Protein (line 37) | class Protein:
method __post_init__ (line 64) | def __post_init__(self):
function from_pdb_string (line 72) | def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Pro...
function _chain_end (line 154) | def _chain_end(atom_index, end_resname, chain_name, residue_index) -> str:
function to_pdb (line 162) | def to_pdb(prot: Protein, model=1, add_end=True) -> str:
function ideal_atom_mask (line 260) | def ideal_atom_mask(prot: Protein) -> np.ndarray:
function from_prediction (line 276) | def from_prediction(
FILE: src/byprot/datamodules/pdb_dataset/residue_constants.py
function load_stereo_chemical_props (line 438) | def load_stereo_chemical_props() -> Tuple[
function sequence_to_onehot (line 887) | def sequence_to_onehot(
function _make_standard_atom_mask (line 1040) | def _make_standard_atom_mask() -> np.ndarray:
function chi_angle_atom (line 1058) | def chi_angle_atom(atom_index: int) -> np.ndarray:
function _make_rigid_transformation_4x4 (line 1105) | def _make_rigid_transformation_4x4(ex, ey, translation):
function _make_rigid_group_constants (line 1136) | def _make_rigid_group_constants():
function make_atom14_dists_bounds (line 1221) | def make_atom14_dists_bounds(
FILE: src/byprot/datamodules/pdb_dataset/utils.py
function pad_feats (line 87) | def pad_feats(raw_feats, max_len, use_torch=False):
function pad_rigid (line 110) | def pad_rigid(rigid: torch.tensor, max_len: int):
function pad (line 119) | def pad(x: np.ndarray, max_len: int, dim=0, use_torch=False, reverse=Fal...
class DataError (line 151) | class DataError(Exception):
class FileExistsError (line 157) | class FileExistsError(DataError):
class MmcifParsingError (line 163) | class MmcifParsingError(DataError):
class ResolutionError (line 169) | class ResolutionError(DataError):
class LengthError (line 175) | class LengthError(DataError):
class CPU_Unpickler (line 181) | class CPU_Unpickler(pickle.Unpickler):
method find_class (line 187) | def find_class(self, module, name):
function create_rigid (line 194) | def create_rigid(rots, trans):
function batch_align_structures (line 199) | def batch_align_structures(pos_1, pos_2, mask=None):
function adjust_oxygen_pos (line 231) | def adjust_oxygen_pos(
function write_pkl (line 334) | def write_pkl(
function read_pkl (line 349) | def read_pkl(read_path: str, verbose=True, use_torch=False, map_location...
function chain_str_to_int (line 369) | def chain_str_to_int(chain_str: str):
function parse_chain_feats (line 378) | def parse_chain_feats(chain_feats, scale_factor=1.0):
function concat_np_features (line 400) | def concat_np_features(
function center_zero (line 425) | def center_zero(
function align_structures (line 446) | def align_structures(
function process_mmcif (line 531) | def process_mmcif(mmcif_path: str, max_resolution: int, max_len: int):
function process_pdb_file (line 643) | def process_pdb_file(file_path: str):
function parse_pdb_feats (line 727) | def parse_pdb_feats(
function process_chain (line 766) | def process_chain(chain: Chain, chain_id: str) -> Protein:
FILE: src/byprot/datamodules/tokenized_protein_datamodule.py
class TokenizedProteinDataModule (line 33) | class TokenizedProteinDataModule(LightningDataModule):
method __init__ (line 34) | def __init__(
method setup (line 55) | def setup(self, stage: Optional[str] = None, split: Optional[str] = No...
method train_dataloader (line 97) | def train_dataloader(self):
method val_dataloader (line 131) | def val_dataloader(self):
method test_dataloader (line 140) | def test_dataloader(self):
function length_cropping (line 151) | def length_cropping(dataset_pandas, epoch, min_crop_length=60):
function sample_cluster (line 167) | def sample_cluster(dataset_pandas, epoch):
FILE: src/byprot/datamodules/uniref50.py
class UniRefDataModule (line 34) | class UniRefDataModule(LightningDataModule):
method __init__ (line 35) | def __init__(
method setup (line 58) | def setup(self, stage: Optional[str] = None):
method train_dataloader (line 102) | def train_dataloader(self):
method val_dataloader (line 114) | def val_dataloader(self):
method test_dataloader (line 124) | def test_dataloader(self):
FILE: src/byprot/datamodules/uniref50_hf.py
class UniRefHFDataModule (line 31) | class UniRefHFDataModule(LightningDataModule):
method __init__ (line 32) | def __init__(
method setup (line 51) | def setup(self, stage: Optional[str] = None):
method train_dataloader (line 83) | def train_dataloader(self):
method val_dataloader (line 94) | def val_dataloader(self):
method test_dataloader (line 102) | def test_dataloader(self):
FILE: src/byprot/models/__init__.py
function register_model (line 16) | def register_model(name):
FILE: src/byprot/models/dplm/dplm.py
class DPLMConfig (line 30) | class DPLMConfig:
class DiffusionProteinLanguageModel (line 39) | class DiffusionProteinLanguageModel(nn.Module):
method __init__ (line 42) | def __init__(self, cfg, net=None):
method from_pretrained (line 60) | def from_pretrained(
method _update_cfg (line 106) | def _update_cfg(self, cfg):
method q_sample_coupled (line 109) | def q_sample_coupled(self, x_0, t1, t2, maskable_mask):
method q_sample (line 135) | def q_sample(self, x_0, t1, maskable_mask):
method forward (line 150) | def forward(self, input_ids, return_last_hidden_state=False, **kwargs):
method compute_loss (line 161) | def compute_loss(self, batch, weighting="constant"):
method forward_encoder (line 205) | def forward_encoder(self, input_tokens, **kwargs):
method initialize_output_tokens (line 208) | def initialize_output_tokens(self, input_tokens, partial_masks=None, *...
method resample (line 220) | def resample(self, _tokens, _scores, ratio, scale):
method forward_decoder (line 304) | def forward_decoder(
method get_non_special_symbol_mask (line 377) | def get_non_special_symbol_mask(self, output_tokens, partial_masks=None):
method _reparam_decoding (line 387) | def _reparam_decoding(
method generate (line 503) | def generate(
FILE: src/byprot/models/dplm/dplm_invfold.py
class GVPTransEncoderConfig (line 27) | class GVPTransEncoderConfig:
class DPLMInvFoldConfig (line 33) | class DPLMInvFoldConfig:
class DPLMInvFold (line 40) | class DPLMInvFold(nn.Module):
method __init__ (line 43) | def __init__(self, cfg) -> None:
method _update_cfg (line 61) | def _update_cfg(self, cfg):
method from_pretrained (line 67) | def from_pretrained(cls, net_name, cfg_override={}, net_override={}):
method forward (line 102) | def forward(
method forward_encoder (line 147) | def forward_encoder(self, batch, use_draft_seq=False):
method get_non_special_sym_mask (line 173) | def get_non_special_sym_mask(self, output_tokens, partial_masks=None):
method forward_decoder (line 183) | def forward_decoder(
method initialize_output_tokens (line 242) | def initialize_output_tokens(
method _reparam_decoding (line 283) | def _reparam_decoding(
method generate (line 399) | def generate(
FILE: src/byprot/models/dplm/modules/dplm_adapter.py
class DPLMWithAdapterConfig (line 29) | class DPLMWithAdapterConfig:
class DPLMWithConditionalAdatper (line 37) | class DPLMWithConditionalAdatper(nn.Module):
method from_pretrained (line 41) | def from_pretrained(cls, cfg):
method __init__ (line 59) | def __init__(self, cfg, net=None):
method forward (line 72) | def forward(
method compute_loss (line 95) | def compute_loss(
method _update_cfg (line 146) | def _update_cfg(self, cfg):
method q_sample_coupled (line 151) | def q_sample_coupled(self, x_0, t1, t2, maskable_mask):
method get_non_special_sym_mask (line 178) | def get_non_special_sym_mask(self, output_tokens, partial_masks=None):
class AdapterLayer (line 189) | class AdapterLayer(nn.Module):
method __init__ (line 190) | def __init__(self, cfg, config):
method forward (line 213) | def forward(
method feed_forward_chunk (line 276) | def feed_forward_chunk(self, attention_output):
method adapter_feed_forward_chunk (line 282) | def adapter_feed_forward_chunk(self, attention_output):
class ModifiedEsmSelfAttention (line 291) | class ModifiedEsmSelfAttention(EsmSelfAttention):
method __init__ (line 292) | def __init__(
class ModifiedEsmAttention (line 302) | class ModifiedEsmAttention(EsmAttention):
method __init__ (line 303) | def __init__(self, config, kdim=None, vdim=None):
FILE: src/byprot/models/dplm/modules/dplm_modeling_esm.py
class ModifiedEsmSelfAttention (line 20) | class ModifiedEsmSelfAttention(EsmSelfAttention):
method forward (line 21) | def forward(
class ModifiedEsmAttention (line 107) | class ModifiedEsmAttention(EsmAttention):
method __init__ (line 108) | def __init__(self, config):
class ModifiedEsmLayer (line 118) | class ModifiedEsmLayer(EsmLayer):
method __init__ (line 119) | def __init__(self, config):
class ModifiedEsmEncoder (line 139) | class ModifiedEsmEncoder(EsmEncoder):
method __init__ (line 140) | def __init__(self, config):
class ModifiedEsmModel (line 152) | class ModifiedEsmModel(EsmModel):
method __init__ (line 153) | def __init__(self, config, add_pooling_layer=True):
method forward (line 170) | def forward(
class EsmForDPLM (line 317) | class EsmForDPLM(EsmForMaskedLM):
method __init__ (line 318) | def __init__(self, config, dropout=0.1):
method forward (line 337) | def forward(
method forward_encoder (line 368) | def forward_encoder(self, batch, **kwargs):
method get_non_special_sym_mask (line 371) | def get_non_special_sym_mask(self, output_tokens, partial_masks=None):
method initialize_output_tokens (line 381) | def initialize_output_tokens(
method forward_decoder (line 397) | def forward_decoder(
method generate (line 443) | def generate(
function sample_from_categorical (line 504) | def sample_from_categorical(logits=None, temperature=1.0):
FILE: src/byprot/models/dplm/modules/gvp_transformer_encoder.py
class GVPTransformerEncoderWrapper (line 13) | class GVPTransformerEncoderWrapper(nn.Module):
method __init__ (line 14) | def __init__(self, freeze=True, output_logits=False, d_model=512):
method forward (line 26) | def forward(self, batch, output_logits=False, **kwargs):
FILE: src/byprot/models/dplm2/dplm2.py
function exists (line 20) | def exists(obj):
class SelfMixupConfig (line 25) | class SelfMixupConfig:
class TokenizerConfig (line 31) | class TokenizerConfig:
class StructTokenizerConfig (line 38) | class StructTokenizerConfig:
class DPLM2Config (line 44) | class DPLM2Config:
class MultimodalDiffusionProteinLanguageModel (line 70) | class MultimodalDiffusionProteinLanguageModel(nn.Module):
method __init__ (line 73) | def __init__(self, cfg, net=None):
method _update_cfg (line 96) | def _update_cfg(self, cfg):
method special_token_list (line 100) | def special_token_list(self):
method from_pretrained (line 119) | def from_pretrained(
method _prepare_special_token (line 168) | def _prepare_special_token(self):
method device (line 190) | def device(self):
method struct_tokenizer (line 198) | def struct_tokenizer(self):
method q_sample (line 206) | def q_sample(self, x_0, t, type_ids, maskable_mask):
method get_modality_type (line 220) | def get_modality_type(self, input_ids):
method forward (line 229) | def forward(self, input_ids, **kwargs):
method self_mixup (line 272) | def self_mixup(self, x_t, single_modality_index):
method get_mixup_xt (line 297) | def get_mixup_xt(self, input_ids, model_pred, non_special_sym_mask=None):
method construct_x_t (line 317) | def construct_x_t(self, struct_target, aatype_target):
method compute_loss (line 394) | def compute_loss(self, batch, weighting="linear"):
method forward_encoder (line 456) | def forward_encoder(self, input_tokens, **kwargs):
method initialize_output_tokens (line 459) | def initialize_output_tokens(
method forward_decoder (line 477) | def forward_decoder(
method get_non_special_symbol_mask (line 554) | def get_non_special_symbol_mask(self, output_tokens, partial_masks=None):
method _reparam_decoding (line 566) | def _reparam_decoding(
method generate (line 741) | def generate(
FILE: src/byprot/models/dplm2/dplm2_bit.py
class BitConfig (line 24) | class BitConfig:
class DPLM2BitConfig (line 32) | class DPLM2BitConfig(DPLM2Config):
class DPLM2Bit (line 38) | class DPLM2Bit(DPLM2):
method __init__ (line 41) | def __init__(self, cfg, net=None):
method _prepare_special_token (line 80) | def _prepare_special_token(self):
method forward (line 87) | def forward(self, input_ids, **kwargs):
method compute_loss (line 149) | def compute_loss(self, batch, weighting="linear"):
method self_mixup (line 222) | def self_mixup(self, x_t, single_modality_index, bsz, seq_len):
method forward_decoder (line 254) | def forward_decoder(
method sample_from_logits (line 321) | def sample_from_logits(
method generate (line 343) | def generate(
method prepare_for_struct_tokenizer (line 438) | def prepare_for_struct_tokenizer(self, decoder_out, non_special_sym_ma...
FILE: src/byprot/models/dplm2/modules/dplm2_bit_modeling_esm.py
class ModifiedRotaryEmbedding (line 28) | class ModifiedRotaryEmbedding(RotaryEmbedding):
method __init__ (line 35) | def __init__(self, dim: int):
method _update_cos_sin_tables (line 40) | def _update_cos_sin_tables(self, x, type_ids, seq_dimension=2):
method forward (line 64) | def forward(
class ModifiedEsmSelfAttention (line 88) | class ModifiedEsmSelfAttention(EsmSelfAttention):
method __init__ (line 89) | def __init__(self, config, position_embedding_type=None):
method forward (line 95) | def forward(
class ModifiedEsmAttention (line 220) | class ModifiedEsmAttention(EsmAttention):
method __init__ (line 221) | def __init__(self, config):
method forward (line 230) | def forward(
class ModifiedEsmLayer (line 259) | class ModifiedEsmLayer(EsmLayer):
method __init__ (line 260) | def __init__(self, config):
method forward (line 279) | def forward(
class ModifiedEsmEncoder (line 355) | class ModifiedEsmEncoder(EsmEncoder):
method __init__ (line 356) | def __init__(self, config):
method forward (line 367) | def forward(
class ModifiedEsmModel (line 467) | class ModifiedEsmModel(EsmModel):
method __init__ (line 468) | def __init__(self, config, add_pooling_layer=True):
method forward (line 485) | def forward(
class EsmForDPLM2Bit (line 638) | class EsmForDPLM2Bit(EsmForMaskedLM):
method __init__ (line 639) | def __init__(self, config, dropout=0.1, codebook_embed_dim=13):
method forward (line 672) | def forward(
FILE: src/byprot/models/dplm2/modules/dplm2_modeling_esm.py
class ModifiedRotaryEmbedding (line 28) | class ModifiedRotaryEmbedding(RotaryEmbedding):
method __init__ (line 35) | def __init__(self, dim: int):
method _update_cos_sin_tables (line 40) | def _update_cos_sin_tables(self, x, type_ids, seq_dimension=2):
method forward (line 64) | def forward(
class ModifiedEsmSelfAttention (line 88) | class ModifiedEsmSelfAttention(EsmSelfAttention):
method __init__ (line 89) | def __init__(self, config, position_embedding_type=None):
method forward (line 95) | def forward(
class ModifiedEsmAttention (line 220) | class ModifiedEsmAttention(EsmAttention):
method __init__ (line 221) | def __init__(self, config):
method forward (line 230) | def forward(
class ModifiedEsmLayer (line 259) | class ModifiedEsmLayer(EsmLayer):
method __init__ (line 260) | def __init__(self, config):
method forward (line 279) | def forward(
class ModifiedEsmEncoder (line 355) | class ModifiedEsmEncoder(EsmEncoder):
method __init__ (line 356) | def __init__(self, config):
method forward (line 367) | def forward(
class ModifiedEsmModel (line 467) | class ModifiedEsmModel(EsmModel):
method __init__ (line 468) | def __init__(self, config, add_pooling_layer=True):
method forward (line 485) | def forward(
class EsmForDPLM2 (line 638) | class EsmForDPLM2(EsmForMaskedLM):
method __init__ (line 639) | def __init__(self, config, dropout=0.1, vocab_size=None):
method forward (line 651) | def forward(
method forward_encoder (line 688) | def forward_encoder(self, batch, **kwargs):
method get_non_special_sym_mask (line 691) | def get_non_special_sym_mask(self, output_tokens, partial_masks=None):
method _get_resized_embeddings (line 701) | def _get_resized_embeddings(
FILE: src/byprot/models/structok/modules/ema.py
class LitEma (line 11) | class LitEma(nn.Module):
method __init__ (line 12) | def __init__(self, model, decay=0.9999, use_num_upates=True):
method forward (line 35) | def forward(self, model):
method copy_to (line 62) | def copy_to(self, model):
method store (line 73) | def store(self, parameters):
method restore (line 82) | def restore(self, parameters):
FILE: src/byprot/models/structok/modules/folding_utils/categorical_mixture.py
class CategoricalMixture (line 12) | class CategoricalMixture:
method __init__ (line 13) | def __init__(self, param, bins=50, start=0, end=1):
method log_prob (line 25) | def log_prob(self, true):
method mean (line 47) | def mean(self):
function categorical_lddt (line 51) | def categorical_lddt(logits, bins=50):
FILE: src/byprot/models/structok/modules/folding_utils/decoder.py
class ESMFoldConfig (line 39) | class ESMFoldConfig:
class ESMFoldStructureDecoder (line 63) | class ESMFoldStructureDecoder(nn.Module):
method __init__ (line 64) | def __init__(self, esmfold_config=None, **kwargs):
method _af2_to_esm (line 130) | def _af2_to_esm(d: Alphabet):
method _af2_idx_to_esm_idx (line 137) | def _af2_idx_to_esm_idx(self, aa, mask):
method _esm_idx_to_af2_idx (line 141) | def _esm_idx_to_af2_idx(self, esmaa, mask):
method _compute_language_model_representations (line 144) | def _compute_language_model_representations(
method _compute_input_representations (line 176) | def _compute_input_representations(self, emb_s, emb_z, esmaa):
method _mask_inputs_to_esm (line 180) | def _mask_inputs_to_esm(self, esmaa, pattern):
method forward (line 185) | def forward(
method infer (line 337) | def infer(
method output_to_pdb (line 403) | def output_to_pdb(self, output: T.Dict) -> T.List[str]:
method infer_pdbs (line 408) | def infer_pdbs(self, seqs: T.List[str], *args, **kwargs) -> T.List[str]:
method infer_pdb (line 414) | def infer_pdb(self, sequence: str, *args, **kwargs) -> str:
method set_chunk_size (line 419) | def set_chunk_size(self, chunk_size: T.Optional[int]):
method device (line 428) | def device(self):
FILE: src/byprot/models/structok/modules/folding_utils/esmfold.py
class ESMFoldConfig (line 28) | class ESMFoldConfig:
class ESMFold (line 49) | class ESMFold(nn.Module):
method __init__ (line 50) | def __init__(self, esmfold_config=None, **kwargs):
method _af2_to_esm (line 106) | def _af2_to_esm(d: Alphabet):
method _af2_idx_to_esm_idx (line 113) | def _af2_idx_to_esm_idx(self, aa, mask):
method _compute_language_model_representations (line 117) | def _compute_language_model_representations(self, esmaa: torch.Tensor)...
method _mask_inputs_to_esm (line 142) | def _mask_inputs_to_esm(self, esmaa, pattern):
method forward (line 147) | def forward(
method infer (line 274) | def infer(
method output_to_pdb (line 332) | def output_to_pdb(self, output: T.Dict) -> T.List[str]:
method infer_pdbs (line 336) | def infer_pdbs(self, seqs: T.List[str], *args, **kwargs) -> T.List[str]:
method infer_pdb (line 341) | def infer_pdb(self, sequence: str, *args, **kwargs) -> str:
method set_chunk_size (line 345) | def set_chunk_size(self, chunk_size: T.Optional[int]):
method device (line 354) | def device(self):
FILE: src/byprot/models/structok/modules/folding_utils/misc.py
function encode_sequence (line 30) | def encode_sequence(
function batch_encode_sequences (line 73) | def batch_encode_sequences(
function output_to_pdb (line 111) | def output_to_pdb(output: T.Dict) -> T.List[str]:
function collate_dense_tensors (line 159) | def collate_dense_tensors(
class Attention (line 189) | class Attention(nn.Module):
method __init__ (line 190) | def __init__(self, embed_dim, num_heads, head_width, gated=False):
method forward (line 210) | def forward(self, x, mask=None, bias=None, indices=None):
class Dropout (line 254) | class Dropout(nn.Module):
method __init__ (line 258) | def __init__(self, r: float, batch_dim: T.Union[int, T.List[int]]):
method forward (line 267) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class SequenceToPair (line 275) | class SequenceToPair(nn.Module):
method __init__ (line 276) | def __init__(self, sequence_state_dim, inner_dim, pairwise_state_dim):
method forward (line 286) | def forward(self, sequence_state):
class PairToSequence (line 313) | class PairToSequence(nn.Module):
method __init__ (line 314) | def __init__(self, pairwise_state_dim, num_heads):
method forward (line 320) | def forward(self, pairwise_state):
class ResidueMLP (line 334) | class ResidueMLP(nn.Module):
method __init__ (line 335) | def __init__(self, embed_dim, inner_dim, norm=nn.LayerNorm, dropout=0):
method forward (line 346) | def forward(self, x):
FILE: src/byprot/models/structok/modules/folding_utils/pretrained.py
function _load_model (line 16) | def _load_model(model_name):
function esmfold_v0 (line 48) | def esmfold_v0():
function esmfold_v1 (line 58) | def esmfold_v1():
function esmfold_structure_module_only_8M (line 69) | def esmfold_structure_module_only_8M():
function esmfold_structure_module_only_8M_270K (line 79) | def esmfold_structure_module_only_8M_270K():
function esmfold_structure_module_only_35M (line 89) | def esmfold_structure_module_only_35M():
function esmfold_structure_module_only_35M_270K (line 99) | def esmfold_structure_module_only_35M_270K():
function esmfold_structure_module_only_150M (line 109) | def esmfold_structure_module_only_150M():
function esmfold_structure_module_only_150M_270K (line 119) | def esmfold_structure_module_only_150M_270K():
function esmfold_structure_module_only_650M (line 129) | def esmfold_structure_module_only_650M():
function esmfold_structure_module_only_650M_270K (line 139) | def esmfold_structure_module_only_650M_270K():
function esmfold_structure_module_only_3B (line 149) | def esmfold_structure_module_only_3B():
function esmfold_structure_module_only_3B_270K (line 159) | def esmfold_structure_module_only_3B_270K():
function esmfold_structure_module_only_15B (line 169) | def esmfold_structure_module_only_15B():
FILE: src/byprot/models/structok/modules/folding_utils/structure_module.py
class AngleResnetBlock (line 59) | class AngleResnetBlock(nn.Module):
method __init__ (line 60) | def __init__(self, c_hidden):
method forward (line 75) | def forward(self, a: torch.Tensor) -> torch.Tensor:
class AngleResnet (line 87) | class AngleResnet(nn.Module):
method __init__ (line 90) | def __init__(self, c_in, c_hidden, no_blocks, no_angles, epsilon):
method forward (line 124) | def forward(
class InvariantPointAttention (line 171) | class InvariantPointAttention(nn.Module):
method __init__ (line 174) | def __init__(
method forward (line 240) | def forward(
class BackboneUpdate (line 434) | class BackboneUpdate(nn.Module):
method __init__ (line 437) | def __init__(self, c_s):
method forward (line 449) | def forward(self, s: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
class StructureModuleTransitionLayer (line 462) | class StructureModuleTransitionLayer(nn.Module):
method __init__ (line 463) | def __init__(self, c):
method forward (line 474) | def forward(self, s):
class StructureModuleTransition (line 487) | class StructureModuleTransition(nn.Module):
method __init__ (line 488) | def __init__(self, c, num_layers, dropout_rate):
method forward (line 503) | def forward(self, s):
class StructureModule (line 513) | class StructureModule(nn.Module):
method __init__ (line 514) | def __init__(
method forward (line 666) | def forward(
method _init_residue_constants (line 801) | def _init_residue_constants(self, float_dtype, device):
method torsion_angles_to_frames (line 846) | def torsion_angles_to_frames(self, r, alpha, f):
method frames_and_literature_positions_to_atom14_pos (line 852) | def frames_and_literature_positions_to_atom14_pos(
FILE: src/byprot/models/structok/modules/folding_utils/tri_self_attn_block.py
class TriangularSelfAttentionBlock (line 29) | class TriangularSelfAttentionBlock(nn.Module):
method __init__ (line 30) | def __init__(
method forward (line 119) | def forward(
FILE: src/byprot/models/structok/modules/folding_utils/trunk.py
class StructureModuleConfig (line 24) | class StructureModuleConfig:
class FoldingTrunkConfig (line 44) | class FoldingTrunkConfig:
function get_axial_mask (line 63) | def get_axial_mask(mask):
class RelativePosition (line 83) | class RelativePosition(nn.Module):
method __init__ (line 84) | def __init__(self, bins, pairwise_state_dim):
method forward (line 92) | def forward(self, residue_index, mask=None):
class FoldingTrunk (line 118) | class FoldingTrunk(nn.Module):
method __init__ (line 119) | def __init__(self, **kwargs):
method set_chunk_size (line 181) | def set_chunk_size(self, chunk_size):
method forward (line 188) | def forward(
method distogram (line 296) | def distogram(coords, min_bin, max_bin, num_bins):
FILE: src/byprot/models/structok/modules/gvp_encoder.py
function exists (line 10) | def exists(x):
class GVPTransformerEncoderWrapper (line 14) | class GVPTransformerEncoderWrapper(nn.Module):
method __init__ (line 15) | def __init__(self, alphabet=None, freeze=True, return_logits=False):
method forward (line 31) | def forward(self, backb_positions, mask, padding_mask, **kwargs):
class GVPTransformerEncoderWrapper2 (line 53) | class GVPTransformerEncoderWrapper2(nn.Module):
method __init__ (line 54) | def __init__(self, alphabet=None, freeze=True, return_logits=False):
method forward (line 70) | def forward(self, backb_positions, mask, padding_mask, **kwargs):
FILE: src/byprot/models/structok/modules/lfq.py
function exists (line 40) | def exists(v):
function default (line 44) | def default(*args):
function pack_one (line 51) | def pack_one(t, pattern):
function unpack_one (line 55) | def unpack_one(t, ps, pattern):
function entropy (line 62) | def entropy(prob):
function mult_along_first_dims (line 69) | def mult_along_first_dims(x, y):
function masked_mean (line 79) | def masked_mean(x, m):
function entropy_loss (line 95) | def entropy_loss(
class LFQ (line 133) | class LFQ(Module):
method __init__ (line 134) | def __init__(
method dtype (line 202) | def dtype(self):
method indices_to_bits (line 205) | def indices_to_bits(self, x):
method get_codebook_entry (line 218) | def get_codebook_entry(self, x, shape=None):
method bits_to_indices (line 241) | def bits_to_indices(self, bits):
method decode (line 257) | def decode(self, x):
method forward (line 272) | def forward(
FILE: src/byprot/models/structok/modules/loss.py
function drmsd (line 53) | def drmsd(structure_1, structure_2, mask=None):
function compute_validation_metrics (line 76) | def compute_validation_metrics(batch, outputs, superimposition_metrics=F...
function softmax_cross_entropy (line 129) | def softmax_cross_entropy(logits, labels):
function sigmoid_cross_entropy (line 137) | def sigmoid_cross_entropy(logits, labels):
function torsion_angle_loss (line 150) | def torsion_angle_loss(
function compute_fape (line 174) | def compute_fape(
function backbone_loss (line 247) | def backbone_loss(
function sidechain_loss (line 305) | def sidechain_loss(
function fape_loss (line 355) | def fape_loss(
function supervised_chi_loss (line 382) | def supervised_chi_loss(
function compute_plddt (line 467) | def compute_plddt(logits: torch.Tensor) -> torch.Tensor:
function lddt (line 481) | def lddt(
function lddt_ca (line 537) | def lddt_ca(
function lddt_loss (line 560) | def lddt_loss(
function distogram_loss (line 612) | def distogram_loss(
function _calculate_bin_centers (line 660) | def _calculate_bin_centers(boundaries: torch.Tensor):
function _calculate_expected_aligned_error (line 669) | def _calculate_expected_aligned_error(
function compute_predicted_aligned_error (line 680) | def compute_predicted_aligned_error(
function compute_tm (line 720) | def compute_tm(
function tm_loss (line 754) | def tm_loss(
function between_residue_bond_loss (line 811) | def between_residue_bond_loss(
function between_residue_clash_loss (line 976) | def between_residue_clash_loss(
function within_residue_violations (line 1123) | def within_residue_violations(
function find_structural_violations (line 1210) | def find_structural_violations(
function find_structural_violations_np (line 1327) | def find_structural_violations_np(
function extreme_ca_ca_distance_violations (line 1344) | def extreme_ca_ca_distance_violations(
function compute_violation_metrics (line 1381) | def compute_violation_metrics(
function compute_violation_metrics_np (line 1424) | def compute_violation_metrics_np(
function violation_loss (line 1440) | def violation_loss(
function compute_renamed_ground_truth (line 1466) | def compute_renamed_ground_truth(
function experimentally_resolved_loss (line 1573) | def experimentally_resolved_loss(
function masked_msa_loss (line 1599) | def masked_msa_loss(logits, true_msa, bert_mask, eps=1e-8, **kwargs):
function lm_loss (line 1631) | def lm_loss(logits, aatype, mask=None, eps=1e-8, **kwargs):
function backbone_atom_loss (line 1662) | def backbone_atom_loss(
function measure_perplexity (line 1690) | def measure_perplexity(predicted_indices, n_embed):
class StructureVQLoss (line 1702) | class StructureVQLoss(nn.Module):
method __init__ (line 1703) | def __init__(self, config):
method forward (line 1711) | def forward(
class AlphaFoldLoss (line 1770) | class AlphaFoldLoss(nn.Module):
method __init__ (line 1773) | def __init__(self, config=None):
method forward (line 1777) | def forward(self, out, batch, _return_breakdown=False):
FILE: src/byprot/models/structok/modules/nn.py
class TransformerEncoder (line 15) | class TransformerEncoder(nn.Module):
method __init__ (line 16) | def __init__(self, embed_dim, attnetion_heads, num_layers):
method forward (line 37) | def forward(
FILE: src/byprot/models/structok/modules/vqvae.py
class VectorQuantizer (line 20) | class VectorQuantizer(nn.Module):
method __init__ (line 36) | def __init__(self, n_e, e_dim, beta):
method forward (line 45) | def forward(self, z):
method get_codebook_entry (line 107) | def get_codebook_entry(self, indices, shape):
class GumbelQuantize (line 125) | class GumbelQuantize(nn.Module):
method __init__ (line 133) | def __init__(
method remap_to_used (line 176) | def remap_to_used(self, inds):
method unmap_to_all (line 192) | def unmap_to_all(self, inds):
method forward (line 202) | def forward(self, z, temp=None, return_logits=False):
method get_codebook_entry (line 240) | def get_codebook_entry(self, indices, shape):
class VectorQuantizer2 (line 255) | class VectorQuantizer2(nn.Module):
method __init__ (line 266) | def __init__(
method remap_to_used (line 306) | def remap_to_used(self, inds):
method unmap_to_all (line 322) | def unmap_to_all(self, inds):
method forward (line 332) | def forward(
method get_codebook_entry (line 410) | def get_codebook_entry(self, indices, shape=None):
class EmbeddingEMA (line 428) | class EmbeddingEMA(nn.Module):
method __init__ (line 429) | def __init__(self, num_tokens, codebook_dim, decay=0.99, eps=1e-5):
method forward (line 441) | def forward(self, embed_id):
method cluster_size_ema_update (line 444) | def cluster_size_ema_update(self, new_cluster_size):
method embed_avg_ema_update (line 449) | def embed_avg_ema_update(self, new_embed_avg):
method weight_update (line 454) | def weight_update(self, num_tokens):
class EMAVectorQuantizer (line 464) | class EMAVectorQuantizer(nn.Module):
method __init__ (line 465) | def __init__(
method remap_to_used (line 500) | def remap_to_used(self, inds):
method unmap_to_all (line 516) | def unmap_to_all(self, inds):
method forward (line 526) | def forward(self, z):
FILE: src/byprot/models/structok/structok_lfq.py
function exists (line 28) | def exists(o):
class VQModel (line 33) | class VQModel(nn.Module):
method __init__ (line 34) | def __init__(
method forward (line 113) | def forward(self, batch, return_pred_indices=True, decoder_kwargs={}):
method encode (line 138) | def encode(self, atom_positions, mask, seq_length=None, gvp_feat=None):
method decode (line 161) | def decode(self, quant, aatype, mask, decoder_kwargs={}):
method quantize_and_decode (line 178) | def quantize_and_decode(
method get_decoder_features (line 195) | def get_decoder_features(self, struct_tokens, res_mask, unk_mask):
method tokenize (line 214) | def tokenize(self, atom_positions, res_mask, seq_length=None):
method detokenize (line 225) | def detokenize(self, struct_tokens, res_mask=None, **kwargs):
method string_to_tensor (line 257) | def string_to_tensor(self, aatype_str, struct_token_str):
method init_data (line 265) | def init_data(self, raw_batch):
method output_to_pdb (line 268) | def output_to_pdb(self, decoder_out, output_dir, is_trajectory=False):
FILE: src/byprot/models/utils.py
class NetConfig (line 25) | class NetConfig:
class LoRAConfig (line 34) | class LoRAConfig:
function get_net_class (line 42) | def get_net_class(dplm_type):
function get_net (line 51) | def get_net(cfg):
function get_net_dplm2 (line 109) | def get_net_dplm2(cfg):
function get_net_dplm2_bit (line 209) | def get_net_dplm2_bit(cfg):
function topk_masking (line 257) | def topk_masking(scores, cutoff_len, stochastic=False, temp=1.0):
function topk_masking_prior (line 278) | def topk_masking_prior(
function mask_fill_811 (line 304) | def mask_fill_811(inputs, masked_indices, mask_id):
function sample_from_categorical (line 325) | def sample_from_categorical(logits=None, temperature=1.0):
function stochastic_sample_from_categorical (line 335) | def stochastic_sample_from_categorical(
function top_k_top_p_filtering (line 347) | def top_k_top_p_filtering(
function get_struct_tokenizer (line 387) | def get_struct_tokenizer(
FILE: src/byprot/modules/__init__.py
class _Criterion (line 12) | class _Criterion(nn.Module):
method __init__ (line 13) | def __init__(self, cfg) -> None:
method _build (line 22) | def _build(self):
method forward (line 31) | def forward(self, model_outs, targets):
FILE: src/byprot/modules/cross_entropy.py
function label_smoothed_nll_loss (line 10) | def label_smoothed_nll_loss(
class CrossEntropyLoss (line 37) | class CrossEntropyLoss(nn.CrossEntropyLoss):
method forward (line 38) | def forward(self, scores: Tensor, target: Tensor, mask=None) -> Tensor:
class Coord2SeqCrossEntropyLoss (line 88) | class Coord2SeqCrossEntropyLoss(nn.CrossEntropyLoss):
method forward (line 89) | def forward(
class RDMCrossEntropyLoss (line 155) | class RDMCrossEntropyLoss(nn.CrossEntropyLoss):
method forward (line 156) | def forward(
class StructAARDMCrossEntropyLoss (line 246) | class StructAARDMCrossEntropyLoss(nn.CrossEntropyLoss):
method forward (line 247) | def forward(
FILE: src/byprot/modules/metrics.py
function luost_rmsd (line 13) | def luost_rmsd(res_list1: list, res_list2: list):
function rmsd (line 43) | def rmsd(pred, target, mask=None):
function accuracy (line 54) | def accuracy(pred, target, mask=None, reduction="all"):
function accuracy_per_sample (line 62) | def accuracy_per_sample(pred, target, mask=None):
FILE: src/byprot/modules/protein_metrics.py
function calc_tm_score (line 59) | def calc_tm_score(pos_1, pos_2, seq_1, seq_2, mask):
function calc_perplexity (line 71) | def calc_perplexity(pred, labels, mask):
function calc_mdtraj_metrics (line 79) | def calc_mdtraj_metrics(pdb_path):
function rigid_transform_3D (line 96) | def rigid_transform_3D(A, B, verbose=False):
function calc_aligned_rmsd (line 148) | def calc_aligned_rmsd(pos_1, pos_2):
function protein_metrics (line 156) | def protein_metrics(
function ca_ca_distance (line 205) | def ca_ca_distance(ca_pos, tol=0.1):
function ca_ca_clashes (line 214) | def ca_ca_clashes(ca_pos, tol=1.5):
FILE: src/byprot/tasks/__init__.py
function on_prediction_mode (line 33) | def on_prediction_mode(pl_module: LightningModule, enable=True):
class TaskLitModule (line 77) | class TaskLitModule(LightningModule):
method __init__ (line 91) | def __init__(
method setup (line 124) | def setup(self, stage=None) -> None:
method lrate (line 129) | def lrate(self):
method stage (line 134) | def stage(self):
method log (line 137) | def log(
method step (line 154) | def step(self, batch):
method training_step (line 157) | def training_step(self, batch: Any, batch_idx: int, **kwargs):
method training_step_end (line 160) | def training_step_end(
method validation_step (line 171) | def validation_step(self, batch: Any, batch_idx: int):
method validation_step_end (line 174) | def validation_step_end(
method on_validation_epoch_end (line 183) | def on_validation_epoch_end(self):
method test_step (line 190) | def test_step(self, batch: Any, batch_idx: int):
method test_step_end (line 193) | def test_step_end(
method on_test_epoch_end (line 200) | def on_test_epoch_end(self):
method forward (line 204) | def forward(self, batch):
method predict_step (line 207) | def predict_step(
method predict_epoch_end (line 212) | def predict_epoch_end(self, results: List[Any], log_pref=None) -> None:
method configure_optimizers (line 216) | def configure_optimizers(self):
method on_train_epoch_end (line 241) | def on_train_epoch_end(self) -> None:
class AutoMetric (line 254) | class AutoMetric(nn.Module):
method __init__ (line 262) | def __init__(self) -> None:
method device (line 267) | def device(self):
method update (line 270) | def update(self, name, value, type="mean", **kwds):
method compute (line 280) | def compute(self, name):
method reset (line 283) | def reset(self, name):
function register_task (line 290) | def register_task(name):
FILE: src/byprot/tasks/lm/dplm.py
function new_arange (line 21) | def new_arange(x, *size):
class DPLMTrainingTask (line 33) | class DPLMTrainingTask(TaskLitModule):
method __init__ (line 44) | def __init__(
method setup (line 65) | def setup(self, stage=None) -> None:
method on_before_optimizer_step (line 76) | def on_before_optimizer_step(self, optimizer):
method build_model (line 83) | def build_model(self):
method build_criterion (line 89) | def build_criterion(self):
method build_torchmetric (line 95) | def build_torchmetric(self):
method step (line 101) | def step(self, batch):
method training_step (line 136) | def training_step(self, batch: Any, batch_idx: int):
method on_test_epoch_start (line 162) | def on_test_epoch_start(self) -> None:
method validation_step (line 165) | def validation_step(self, batch: Any, batch_idx: int):
method on_validation_epoch_end (line 177) | def on_validation_epoch_end(self):
FILE: src/byprot/tasks/lm/dplm2.py
function cal_index_acc (line 22) | def cal_index_acc(logits, target, loss_mask, bit_level=False):
class DPLM2TrainingTask (line 43) | class DPLM2TrainingTask(TaskLitModule):
method __init__ (line 52) | def __init__(
method setup (line 70) | def setup(self, stage=None) -> None:
method on_before_optimizer_step (line 81) | def on_before_optimizer_step(self, optimizer):
method build_model (line 88) | def build_model(self):
method build_criterion (line 94) | def build_criterion(self):
method build_torchmetric (line 100) | def build_torchmetric(self):
method load_from_ckpt (line 112) | def load_from_ckpt(self, ckpt_path, not_load=False):
method step (line 127) | def step(self, batch):
method training_step (line 185) | def training_step(self, batch: Any, batch_idx: int):
method validation_step (line 211) | def validation_step(self, batch: Any, batch_idx: int):
method on_validation_epoch_end (line 241) | def on_validation_epoch_end(self):
FILE: src/byprot/tasks/lm/dplm_invfold.py
function new_arange (line 26) | def new_arange(x, *size):
class ConditionalDPLMTrainingTask (line 38) | class ConditionalDPLMTrainingTask(TaskLitModule):
method __init__ (line 61) | def __init__(
method setup (line 83) | def setup(self, stage=None) -> None:
method on_test_epoch_start (line 94) | def on_test_epoch_start(self) -> None:
method on_predict_epoch_start (line 104) | def on_predict_epoch_start(self) -> None:
method load_from_ckpt (line 114) | def load_from_ckpt(self, ckpt_path, not_load=False):
method build_model (line 128) | def build_model(self):
method build_generator (line 134) | def build_generator(self):
method build_criterion (line 141) | def build_criterion(self):
method build_torchmetric (line 147) | def build_torchmetric(self):
method inject_noise (line 164) | def inject_noise(
method step (line 232) | def step(self, batch):
method training_step (line 295) | def training_step(self, batch: Any, batch_idx: int):
method validation_step (line 321) | def validation_step(self, batch: Any, batch_idx: int):
method eval_self_consistency (line 337) | def eval_self_consistency(self, pred_ids, positions, mask=None):
method on_validation_epoch_end (line 380) | def on_validation_epoch_end(self):
method forward (line 427) | def forward(self, batch, return_ids=False):
method predict_step (line 439) | def predict_step(
method on_predict_epoch_end (line 500) | def on_predict_epoch_end(self) -> None:
method save_prediction (line 567) | def save_prediction(self, results, saveto=None):
function decode (line 605) | def decode(batch_ids, alphabet, remove_special=False, replace_X=True):
FILE: src/byprot/tasks/lm/mlm.py
function new_arange (line 27) | def new_arange(x, *size):
class MLMTrainingTask (line 39) | class MLMTrainingTask(TaskLitModule):
method __init__ (line 52) | def __init__(
method setup (line 71) | def setup(self, stage=None) -> None:
method on_before_optimizer_step (line 82) | def on_before_optimizer_step(self, optimizer):
method build_model (line 89) | def build_model(self):
method build_criterion (line 93) | def build_criterion(self):
method build_torchmetric (line 99) | def build_torchmetric(self):
method inject_noise (line 109) | def inject_noise(self, tokens):
method step (line 157) | def step(self, batch):
method training_step (line 188) | def training_step(self, batch: Any, batch_idx: int):
method on_test_epoch_start (line 214) | def on_test_epoch_start(self) -> None:
method validation_step (line 217) | def validation_step(self, batch: Any, batch_idx: int):
method on_validation_epoch_end (line 231) | def on_validation_epoch_end(self):
method forward (line 277) | def forward(self, batch, return_ids=False):
method predict_step (line 296) | def predict_step(
method on_predict_epoch_end (line 313) | def on_predict_epoch_end(self) -> None:
FILE: src/byprot/tasks/struct_tokenizer/structok.py
function exists (line 36) | def exists(o):
class StrucTok (line 41) | class StrucTok(TaskLitModule):
method __init__ (line 49) | def __init__(
method setup (line 67) | def setup(self, stage=None) -> None:
method load_from_ckpt (line 84) | def load_from_ckpt(self, ckpt_path):
method build_model (line 111) | def build_model(self):
method build_criterion (line 117) | def build_criterion(self):
method build_torchmetric (line 122) | def build_torchmetric(self):
method ema_scope (line 128) | def ema_scope(self, context=None):
method step (line 142) | def step(self, batch):
method training_step (line 160) | def training_step(self, batch: Any, batch_idx: int, **kwargs):
method validation_step (line 189) | def validation_step(self, batch: Any, batch_idx: int):
method on_validation_epoch_end (line 201) | def on_validation_epoch_end(self):
method _log (line 226) | def _log(self, loss_breakdown, batch, outputs, train=True):
method _compute_validation_metrics (line 263) | def _compute_validation_metrics(
method configure_optimizers (line 327) | def configure_optimizers(self):
FILE: src/byprot/testing_pipeline.py
function test (line 25) | def test(config: DictConfig) -> None:
FILE: src/byprot/training_pipeline.py
function train (line 26) | def train(config: DictConfig) -> Optional[float]:
FILE: src/byprot/utils/__init__.py
function get_logger (line 38) | def get_logger(name=__name__) -> logging.Logger:
function load_from_experiment (line 62) | def load_from_experiment(experiment_save_dir, ckpt="best.ckpt"):
function extras (line 75) | def extras(config: DictConfig) -> None:
function print_config (line 103) | def print_config(
function log_hyperparameters (line 156) | def log_hyperparameters(
function finish (line 203) | def finish(
function common_pipeline (line 221) | def common_pipeline(config, training=False):
function resolve_ckpt_path (line 270) | def resolve_ckpt_path(ckpt_dir, ckpt_path):
function recursive_to (line 288) | def recursive_to(obj, device):
function recursive_apply (line 307) | def recursive_apply(obj, fn):
function recursive_eval (line 320) | def recursive_eval(obj):
function import_modules (line 335) | def import_modules(models_dir, namespace, excludes=[]):
function get_git_revision_hash (line 359) | def get_git_revision_hash() -> str:
function seed_everything (line 370) | def seed_everything(seed, verbose=False) -> int:
function local_seed (line 399) | def local_seed(seed, enable=True):
FILE: src/byprot/utils/callbacks.py
function _package_available (line 29) | def _package_available(package_name: str) -> bool:
function _compare_version (line 43) | def _compare_version(
function float_fmt (line 88) | def float_fmt(float_value):
class BetterMetricsTextColumn (line 96) | class BetterMetricsTextColumn(MetricsTextColumn):
method render (line 99) | def render(self, task) -> Text:
class BetterRichProgressBar (line 123) | class BetterRichProgressBar(RichProgressBar):
method _init_progress (line 124) | def _init_progress(self, trainer):
class ValEveryNSteps (line 150) | class ValEveryNSteps(pl.Callback):
method __init__ (line 151) | def __init__(self, every_n_step):
method on_batch_end (line 154) | def on_batch_end(self, trainer, pl_module):
class CheckpointEveryNSteps (line 162) | class CheckpointEveryNSteps(pl.Callback):
method __init__ (line 166) | def __init__(
method on_batch_end (line 184) | def on_batch_end(self, trainer: pl.Trainer, _):
class ModelCheckpoint (line 201) | class ModelCheckpoint(callbacks.ModelCheckpoint):
method _format_checkpoint_name (line 206) | def _format_checkpoint_name(
method on_train_start (line 221) | def on_train_start(
method _update_best_and_save (line 227) | def _update_best_and_save(
method _save_last_checkpoint (line 290) | def _save_last_checkpoint(
class TrackNorms (line 307) | class TrackNorms(pl.Callback):
method on_after_training_step (line 311) | def on_after_training_step(
method on_after_backward (line 333) | def on_after_backward(
FILE: src/byprot/utils/config.py
function get_logger (line 14) | def get_logger(name=__name__) -> logging.Logger:
function make_config (line 38) | def make_config(**kwargs):
function compose_config (line 42) | def compose_config(**kwds):
function merge_config (line 46) | def merge_config(default_cfg, override_cfg):
function load_yaml_config (line 52) | def load_yaml_config(fpath: str) -> OmegaConf:
function parse_cli_override_args (line 58) | def parse_cli_override_args():
function resolve_experiment_config (line 72) | def resolve_experiment_config(config: DictConfig):
function _convert_target_to_string (line 102) | def _convert_target_to_string(t: Any) -> Any:
function get_obj_from_str (line 109) | def get_obj_from_str(string, reload=False):
function instantiate_from_config (line 117) | def instantiate_from_config(cfg: OmegaConf, group=None, **override_kwargs):
function instantiate_from_config2 (line 143) | def instantiate_from_config2(config):
FILE: src/byprot/utils/io.py
function filter_backbone2 (line 28) | def filter_backbone2(array):
function load_structure (line 52) | def load_structure(fpath, chain=None):
function extract_coords_from_structure (line 88) | def extract_coords_from_structure(
function load_coords (line 109) | def load_coords(fpath, chain, atoms=["N", "CA", "C", "O"]):
function get_atom_coords_residuewise (line 123) | def get_atom_coords_residuewise(
function save_pdb (line 141) | def save_pdb(path, coords, seq):
FILE: src/byprot/utils/logger.py
class ByProtWandbLogger (line 32) | class ByProtWandbLogger(WandbLogger):
method __init__ (line 33) | def __init__(
FILE: src/byprot/utils/lr_scheduler.py
function get_scheduler (line 10) | def get_scheduler(cfg, optimizer):
class BlackHole (line 65) | class BlackHole(object):
method __setattr__ (line 66) | def __setattr__(self, name, value):
method __call__ (line 69) | def __call__(self, *args, **kwargs):
method __getattr__ (line 72) | def __getattr__(self, name):
function inverse_sqrt_lr_schedule (line 76) | def inverse_sqrt_lr_schedule(
class InverseSqrtLRScheduler (line 87) | class InverseSqrtLRScheduler(LambdaLR):
method __init__ (line 88) | def __init__(
function noam_lr_schedule (line 116) | def noam_lr_schedule(step, warmup_steps, factor, model_size):
class NoamScheduler (line 125) | class NoamScheduler(LambdaLR):
method __init__ (line 126) | def __init__(
function polynomial_lr_schedule (line 145) | def polynomial_lr_schedule(
class PolyNomialLRScheduler (line 161) | class PolyNomialLRScheduler(LambdaLR):
method __init__ (line 162) | def __init__(
FILE: src/byprot/utils/optim.py
function get_optimizer (line 15) | def get_optimizer(cfg, params):
class AdamW (line 52) | class AdamW(torch.optim.AdamW):
method step (line 54) | def step(self, closure=None):
FILE: src/byprot/utils/protein/all_atom.py
function to_atom37 (line 44) | def to_atom37(trans, rots):
function torsion_angles_to_frames (line 53) | def torsion_angles_to_frames(
function prot_to_torsion_angles (line 129) | def prot_to_torsion_angles(aatype, atom37, atom37_mask):
function frames_to_atom14_pos (line 144) | def frames_to_atom14_pos(
function compute_backbone (line 180) | def compute_backbone(bb_rigids, psi_torsions):
function calculate_neighbor_angles (line 205) | def calculate_neighbor_angles(R_ac, R_ab):
function vector_projection (line 230) | def vector_projection(R_ab, P_n):
function transrot_to_atom37 (line 250) | def transrot_to_atom37(transrot_traj, res_mask):
function atom37_from_trans_rot (line 270) | def atom37_from_trans_rot(trans, rots, res_mask):
function process_trans_rot_traj (line 284) | def process_trans_rot_traj(trans_traj, rots_traj, res_mask):
function adjust_oxygen_pos (line 294) | def adjust_oxygen_pos(
FILE: src/byprot/utils/protein/evaluator_dplm2.py
function load_from_pdb (line 53) | def load_from_pdb(pdb_path, process_chain=PdbDataset.process_chain):
function load_pdb_by_name (line 60) | def load_pdb_by_name(pdb_name, metadata_df):
class EvalRunner (line 70) | class EvalRunner:
method __init__ (line 71) | def __init__(self, cfg: DictConfig):
method load_metadata (line 122) | def load_metadata(self, cfg):
method device_id (line 136) | def device_id(self):
method device (line 142) | def device(self):
method folding_model (line 148) | def folding_model(self):
method struct_tokenizer (line 156) | def struct_tokenizer(self):
method inference_dir (line 166) | def inference_dir(self):
method setup_inference_dir (line 180) | def setup_inference_dir(self, ckpt_path):
method run_detokenize_from_fasta (line 193) | def run_detokenize_from_fasta(self, fasta_path):
method get_pdb_from_struct_fasta (line 252) | def get_pdb_from_struct_fasta(self, struct_fasta_path):
method write_trajectory (line 263) | def write_trajectory(self, pdb_folder):
method _run_struct_tokenizer (line 288) | def _run_struct_tokenizer(self, batch, output_dir, is_trajectory=False):
method run_tokenize (line 307) | def run_tokenize(self, pdb_folder, output_dir):
method evaluate_reconstruction (line 364) | def evaluate_reconstruction(self, pdb_folder, inplace_save=False):
method evaluate_unconditional (line 419) | def evaluate_unconditional(self, pdb_folder, inplace_save=False):
method evaluate_forward_folding (line 448) | def evaluate_forward_folding(self, pdb_folder, inplace_save=False):
method evaluate_inverse_folding (line 489) | def evaluate_inverse_folding(self, fasta_path, inplace_save=False):
method run_evaluation (line 555) | def run_evaluation(self, batch, eval_dir):
method run_pmpnn (line 806) | def run_pmpnn(
method compute_sample_metrics (line 830) | def compute_sample_metrics(
method compute_unconditional_metrics (line 1016) | def compute_unconditional_metrics(self, output_dir):
method compute_reconstruction_metrics (line 1051) | def compute_reconstruction_metrics(self, output_dir):
method compute_forward_folding_metrics (line 1074) | def compute_forward_folding_metrics(self, output_dir):
method compute_inverse_folding_metrics (line 1095) | def compute_inverse_folding_metrics(self, output_dir):
function run (line 1123) | def run(cfg: DictConfig) -> None:
FILE: src/byprot/utils/protein/folding_model.py
class FoldingModel (line 25) | class FoldingModel:
method __init__ (line 26) | def __init__(self, cfg, device_id=None):
method device_id (line 34) | def device_id(self):
method device (line 40) | def device(self):
method fold_fasta (line 45) | def fold_fasta(self, fasta_path, output_dir):
method _esmf_model (line 57) | def _esmf_model(self, fasta_path, output_dir):
method _af2_model (line 86) | def _af2_model(self, fasta_path, output_dir):
method run_pmpnn (line 141) | def run_pmpnn(self, input_dir, output_path):
FILE: src/byprot/utils/protein/residue_constants.py
function load_stereo_chemical_props (line 438) | def load_stereo_chemical_props() -> Tuple[
function sequence_to_onehot (line 887) | def sequence_to_onehot(
function _make_standard_atom_mask (line 1040) | def _make_standard_atom_mask() -> np.ndarray:
function chi_angle_atom (line 1058) | def chi_angle_atom(atom_index: int) -> np.ndarray:
function _make_rigid_transformation_4x4 (line 1105) | def _make_rigid_transformation_4x4(ex, ey, translation):
function _make_rigid_group_constants (line 1136) | def _make_rigid_group_constants():
function make_atom14_dists_bounds (line 1221) | def make_atom14_dists_bounds(
FILE: src/byprot/utils/protein/tokenize_pdb.py
function load_from_pdb (line 29) | def load_from_pdb(pdb_path, process_chain=PdbDataset.process_chain):
function run_tokenize (line 37) | def run_tokenize(struct_tokenizer, input_pdb_folder, output_dir):
function main (line 89) | def main():
FILE: src/byprot/utils/protein/utils.py
class LengthDataset (line 45) | class LengthDataset(torch.utils.data.Dataset):
method __init__ (line 46) | def __init__(self, samples_cfg):
method __len__ (line 70) | def __len__(self):
method __getitem__ (line 73) | def __getitem__(self, idx):
function dataset_creation (line 87) | def dataset_creation(dataset_class, cfg, task):
function get_available_device (line 101) | def get_available_device(num_device):
function run_easy_cluster (line 105) | def run_easy_cluster(designable_dir, output_dir):
function get_all_top_samples (line 138) | def get_all_top_samples(output_dir, csv_fname="*/*/top_sample.csv"):
function calculate_diversity (line 149) | def calculate_diversity(
function add_diversity_metrics (line 171) | def add_diversity_metrics(designable_dir, designable_csv, designable_csv...
function calculate_pmpnn_consistency (line 178) | def calculate_pmpnn_consistency(
function calculate_pmpnn_designability (line 211) | def calculate_pmpnn_designability(
function get_pylogger (line 250) | def get_pylogger(name=__name__) -> logging.Logger:
function get_ddp_info (line 272) | def get_ddp_info():
function flatten_dict (line 285) | def flatten_dict(raw_dict):
function save_traj (line 296) | def save_traj(
function get_dataset_cfg (line 384) | def get_dataset_cfg(cfg):
function create_full_prot (line 399) | def create_full_prot(
function write_prot_to_pdb (line 425) | def write_prot_to_pdb(
function calc_distogram (line 490) | def calc_distogram(pos, min_bin, max_bin, num_bins):
function get_index_embedding (line 500) | def get_index_embedding(indices, embed_size, max_len=2056):
function get_time_embedding (line 522) | def get_time_embedding(timesteps, embedding_dim, max_positions=2000):
function sinusoidal_encoding (line 540) | def sinusoidal_encoding(v, N, D):
function distance (line 572) | def distance(p, eps=1e-10):
function dist_from_ca (line 577) | def dist_from_ca(trans):
function calc_rbf (line 593) | def calc_rbf(ca_dists, num_rbf, D_min=1e-3, D_max=22.0):
function t_stratified_loss (line 602) | def t_stratified_loss(batch_t, batch_loss, num_bins=4, loss_name=None):
function process_folded_outputs (line 624) | def process_folded_outputs(sample_path, folded_output, true_bb_pos=None):
function extract_clusters_from_maxcluster_out (line 758) | def extract_clusters_from_maxcluster_out(file_path):
function calc_mdtraj_metrics (line 797) | def calc_mdtraj_metrics(pdb_path):
function calc_aatype_metrics (line 822) | def calc_aatype_metrics(generated_aatypes):
function calc_ca_ca_metrics (line 881) | def calc_ca_ca_metrics(ca_pos, bond_tol=0.1, clash_tol=1.0):
function calc_tm_score (line 900) | def calc_tm_score(pos_1, pos_2, seq_1, seq_2):
FILE: src/byprot/utils/registry.py
function get_module (line 14) | def get_module(group_name, module_name):
function get_registered_modules (line 24) | def get_registered_modules(group_name):
FILE: src/byprot/utils/scaffold_utils.py
function get_intervals (line 192) | def get_intervals(list, single_res_domain=False):
function get_motif_dplm (line 212) | def get_motif_dplm(pdb, ori_pdb):
function get_motif_dplm2 (line 247) | def get_motif_dplm2(pdb_name, ori_pdb_name, motif_seq, mask_token, space...
function prepare_data (line 278) | def prepare_data(pdb_path, alphabet, collator, num_seqs, device):
function get_initial_dplm (line 300) | def get_initial_dplm(args, tokenizer, pdb, ori_pdb, device):
function get_initial_dplm2 (line 395) | def get_initial_dplm2(args, aa_seq, struct_seq, tokenizer, pdb, ori_pdb,...
function create_init_seq (line 410) | def create_init_seq(pdb, ori_pdb, aa_seq, struct_seq, tokenizer, args):
function collate (line 511) | def collate(tokenizer, init_aa_seq, init_struct_seq, args, device):
function create_idxs_list (line 567) | def create_idxs_list(pdb, tokenizer, batch, args):
function create_batches (line 599) | def create_batches(batch, args):
FILE: src/byprot/utils/strategies.py
class CPUInitFSDPStrategy (line 28) | class CPUInitFSDPStrategy(FSDPStrategy):
method _setup_model (line 30) | def _setup_model(self, model: Module) -> Module:
FILE: test.py
function main (line 25) | def main(config: DictConfig):
FILE: train.py
function main (line 51) | def main(config: DictConfig):
FILE: vendor/openfold/openfold/config.py
function set_inf (line 6) | def set_inf(c, inf):
function enforce_config_constraints (line 14) | def enforce_config_constraints(config):
function model_config (line 51) | def model_config(
FILE: vendor/openfold/openfold/data/data_modules.py
class OpenFoldSingleDataset (line 23) | class OpenFoldSingleDataset(torch.utils.data.Dataset):
method __init__ (line 24) | def __init__(self,
method _parse_mmcif (line 170) | def _parse_mmcif(self, path, file_id, chain_id, alignment_dir, alignme...
method chain_id_to_idx (line 195) | def chain_id_to_idx(self, chain_id):
method idx_to_chain_id (line 198) | def idx_to_chain_id(self, idx):
method __getitem__ (line 201) | def __getitem__(self, idx):
method __len__ (line 283) | def __len__(self):
function deterministic_train_filter (line 287) | def deterministic_train_filter(
function get_stochastic_train_filter_prob (line 310) | def get_stochastic_train_filter_prob(
class OpenFoldDataset (line 331) | class OpenFoldDataset(torch.utils.data.Dataset):
method __init__ (line 338) | def __init__(self,
method __getitem__ (line 401) | def __getitem__(self, idx):
method __len__ (line 405) | def __len__(self):
method reroll (line 408) | def reroll(self):
class OpenFoldBatchCollator (line 423) | class OpenFoldBatchCollator:
method __call__ (line 424) | def __call__(self, prots):
class OpenFoldDataLoader (line 429) | class OpenFoldDataLoader(torch.utils.data.DataLoader):
method __init__ (line 430) | def __init__(self, *args, config, stage="train", generator=None, **kwa...
method _prep_batch_properties_probs (line 437) | def _prep_batch_properties_probs(self):
method _add_batch_properties (line 467) | def _add_batch_properties(self, batch):
method __iter__ (line 503) | def __iter__(self):
class OpenFoldDataModule (line 513) | class OpenFoldDataModule(pl.LightningDataModule):
method __init__ (line 514) | def __init__(self,
method setup (line 605) | def setup(self):
method _gen_dataloader (line 688) | def _gen_dataloader(self, stage):
method train_dataloader (line 720) | def train_dataloader(self):
method val_dataloader (line 723) | def val_dataloader(self):
method predict_dataloader (line 728) | def predict_dataloader(self):
class DummyDataset (line 732) | class DummyDataset(torch.utils.data.Dataset):
method __init__ (line 733) | def __init__(self, batch_path):
method __getitem__ (line 737) | def __getitem__(self, idx):
method __len__ (line 740) | def __len__(self):
class DummyDataLoader (line 744) | class DummyDataLoader(pl.LightningDataModule):
method __init__ (line 745) | def __init__(self, batch_path):
method train_dataloader (line 749) | def train_dataloader(self):
FILE: vendor/openfold/openfold/data/data_pipeline.py
function empty_template_feats (line 33) | def empty_template_feats(n_res) -> FeatureDict:
function make_template_features (line 43) | def make_template_features(
function unify_template_features (line 70) | def unify_template_features(
function make_sequence_features (line 115) | def make_sequence_features(
function make_mmcif_features (line 137) | def make_mmcif_features(
function _aatype_to_str_sequence (line 173) | def _aatype_to_str_sequence(aatype):
function make_protein_features (line 180) | def make_protein_features(
function make_pdb_features (line 210) | def make_pdb_features(
function make_msa_features (line 228) | def make_msa_features(
function make_dummy_msa_feats (line 265) | def make_dummy_msa_feats(input_sequence):
function make_sequence_features_with_custom_template (line 276) | def make_sequence_features_with_custom_template(
class AlignmentRunner (line 311) | class AlignmentRunner:
method __init__ (line 313) | def __init__(
method run (line 451) | def run(
class DataPipeline (line 506) | class DataPipeline:
method __init__ (line 508) | def __init__(
method _parse_msa_data (line 514) | def _parse_msa_data(
method _parse_template_hits (line 569) | def _parse_template_hits(
method _get_msas (line 602) | def _get_msas(self,
method _process_msa_feats (line 627) | def _process_msa_feats(
method _process_seqemb_features (line 644) | def _process_seqemb_features(self,
method process_fasta (line 659) | def process_fasta(
method process_mmcif (line 706) | def process_mmcif(
method process_pdb (line 748) | def process_pdb(
method process_core (line 800) | def process_core(
method process_multiseq_fasta (line 835) | def process_multiseq_fasta(self,
FILE: vendor/openfold/openfold/data/data_transforms.py
function cast_to_64bit_ints (line 43) | def cast_to_64bit_ints(protein):
function make_one_hot (line 52) | def make_one_hot(x, num_classes):
function make_seq_mask (line 58) | def make_seq_mask(protein):
function make_template_mask (line 65) | def make_template_mask(protein):
function curry1 (line 72) | def curry1(f):
function make_all_atom_aatype (line 81) | def make_all_atom_aatype(protein):
function fix_templates_aatype (line 86) | def fix_templates_aatype(protein):
function correct_msa_restypes (line 105) | def correct_msa_restypes(protein):
function squeeze_features (line 130) | def squeeze_features(protein):
function randomly_replace_msa_with_unknown (line 162) | def randomly_replace_msa_with_unknown(protein, replace_proportion):
function sample_msa (line 184) | def sample_msa(protein, max_seq, keep_extra, seed=None):
function add_distillation_flag (line 215) | def add_distillation_flag(protein, distillation):
function sample_msa_distillation (line 220) | def sample_msa_distillation(protein, max_seq):
function crop_extra_msa (line 227) | def crop_extra_msa(protein, max_extra_msa):
function delete_extra_msa (line 240) | def delete_extra_msa(protein):
function block_delete_msa (line 249) | def block_delete_msa(protein, config):
function nearest_neighbor_clusters (line 283) | def nearest_neighbor_clusters(protein, gap_agreement_weight=0.0):
function unsorted_segment_sum (line 319) | def unsorted_segment_sum(data, segment_ids, num_segments):
function summarize_clusters (line 347) | def summarize_clusters(protein):
function make_msa_mask (line 372) | def make_msa_mask(protein):
function pseudo_beta_fn (line 381) | def pseudo_beta_fn(aatype, all_atom_positions, all_atom_mask):
function make_pseudo_beta (line 402) | def make_pseudo_beta(protein, prefix=""):
function add_constant_field (line 417) | def add_constant_field(protein, key, value):
function shaped_categorical (line 422) | def shaped_categorical(probs, epsilon=1e-10):
function make_hhblits_profile (line 432) | def make_hhblits_profile(protein):
function make_masked_msa (line 445) | def make_masked_msa(protein, config, replace_fraction):
function make_fixed_size (line 489) | def make_fixed_size(
function make_msa_feat (line 528) | def make_msa_feat(protein):
function select_feat (line 579) | def select_feat(protein, feature_list):
function crop_templates (line 584) | def crop_templates(protein, max_templates):
function make_atom14_masks (line 591) | def make_atom14_masks(protein):
function make_atom14_masks_np (line 665) | def make_atom14_masks_np(batch):
function make_atom14_positions (line 676) | def make_atom14_positions(protein):
function atom37_to_frames (line 779) | def atom37_to_frames(protein, eps=1e-8):
function get_chi_atom_indices (line 918) | def get_chi_atom_indices():
function atom37_to_torsion_angles (line 946) | def atom37_to_torsion_angles(
function get_backbone_frames (line 1114) | def get_backbone_frames(protein):
function get_chi_angles (line 1124) | def get_chi_angles(protein):
function random_crop_to_size (line 1135) | def random_crop_to_size(
FILE: vendor/openfold/openfold/data/errors.py
class Error (line 17) | class Error(Exception):
class MultipleChainsError (line 21) | class MultipleChainsError(Error):
FILE: vendor/openfold/openfold/data/feature_pipeline.py
function np_to_tensor_dict (line 30) | def np_to_tensor_dict(
function make_data_config (line 53) | def make_data_config(
function np_example_to_features (line 79) | def np_example_to_features(
class FeaturePipeline (line 121) | class FeaturePipeline:
method __init__ (line 122) | def __init__(
method process_features (line 128) | def process_features(
FILE: vendor/openfold/openfold/data/input_pipeline.py
function nonensembled_transform_fns (line 23) | def nonensembled_transform_fns(common_cfg, mode_cfg):
function ensembled_transform_fns (line 70) | def ensembled_transform_fns(common_cfg, mode_cfg, ensemble_seed):
function process_tensors_from_config (line 153) | def process_tensors_from_config(tensors, common_cfg, mode_cfg):
function compose (line 194) | def compose(x, fs):
function map_fn (line 200) | def map_fn(fun, x):
FILE: vendor/openfold/openfold/data/mmcif_parsing.py
class Monomer (line 42) | class Monomer:
class AtomSite (line 50) | class AtomSite:
class ResiduePosition (line 63) | class ResiduePosition:
class ResidueAtPosition (line 70) | class ResidueAtPosition:
class MmcifObject (line 78) | class MmcifObject:
class ParsingResult (line 104) | class ParsingResult:
class ParseError (line 117) | class ParseError(Exception):
function mmcif_loop_to_list (line 121) | def mmcif_loop_to_list(
function mmcif_loop_to_dict (line 153) | def mmcif_loop_to_dict(
function parse (line 176) | def parse(
function _get_first_model (line 306) | def _get_first_model(structure: PdbStructure) -> PdbStructure:
function get_release_date (line 314) | def get_release_date(parsed_info: MmCIFDict) -> str:
function _get_header (line 320) | def _get_header(parsed_info: MmCIFDict) -> PdbHeader:
function _get_atom_site_list (line 356) | def _get_atom_site_list(parsed_info: MmCIFDict) -> Sequence[AtomSite]:
function _get_protein_chains (line 373) | def _get_protein_chains(
function _is_set (line 427) | def _is_set(data: str) -> bool:
function get_atom_coords (line 432) | def get_atom_coords(
FILE: vendor/openfold/openfold/data/parsers.py
class TemplateHit (line 28) | class TemplateHit:
function parse_fasta (line 41) | def parse_fasta(fasta_string: str) -> Tuple[Sequence[str], Sequence[str]]:
function parse_stockholm (line 72) | def parse_stockholm(
function parse_a3m (line 132) | def parse_a3m(a3m_string: str) -> Tuple[Sequence[str], DeletionMatrix]:
function _convert_sto_seq_to_a3m (line 166) | def _convert_sto_seq_to_a3m(
function convert_stockholm_to_a3m (line 176) | def convert_stockholm_to_a3m(
function _get_hhr_line_regex_groups (line 230) | def _get_hhr_line_regex_groups(
function _update_hhr_residue_indices_list (line 239) | def _update_hhr_residue_indices_list(
function _parse_hhr_hit (line 252) | def _parse_hhr_hit(detailed_lines: Sequence[str]) -> TemplateHit:
function parse_hhr (line 358) | def parse_hhr(hhr_string: str) -> Sequence[TemplateHit]:
function parse_e_values_from_tblout (line 378) | def parse_e_values_from_tblout(tblout: str) -> Dict[str, float]:
FILE: vendor/openfold/openfold/data/templates.py
class NoChainsError (line 35) | class NoChainsError(Error):
class SequenceNotInTemplateError (line 39) | class SequenceNotInTemplateError(Error):
class NoAtomDataInTemplateError (line 43) | class NoAtomDataInTemplateError(Error):
class TemplateAtomMaskAllZerosError (line 47) | class TemplateAtomMaskAllZerosError(Error):
class QueryToTemplateAlignError (line 51) | class QueryToTemplateAlignError(Error):
class CaDistanceError (line 55) | class CaDistanceError(Error):
class PrefilterError (line 60) | class PrefilterError(Exception):
class DateError (line 64) | class DateError(PrefilterError):
class PdbIdError (line 68) | class PdbIdError(PrefilterError):
class AlignRatioError (line 72) | class AlignRatioError(PrefilterError):
class DuplicateError (line 76) | class DuplicateError(PrefilterError):
class LengthError (line 80) | class LengthError(PrefilterError):
function _get_pdb_id_and_chain (line 94) | def _get_pdb_id_and_chain(hit: parsers.TemplateHit) -> Tuple[str, str]:
function _is_after_cutoff (line 104) | def _is_after_cutoff(
function _replace_obsolete_references (line 133) | def _replace_obsolete_references(obsolete_mapping) -> Mapping[str, str]:
function _parse_obsolete (line 149) | def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, str]:
function generate_release_dates_cache (line 165) | def generate_release_dates_cache(mmcif_dir: str, out_path: str):
function _parse_release_dates (line 190) | def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]:
function _assess_hhsearch_hit (line 203) | def _assess_hhsearch_hit(
function _find_template_in_pdb (line 282) | def _find_template_in_pdb(
function _realign_pdb_template_to_query (line 356) | def _realign_pdb_template_to_query(
function _check_residue_distances (line 494) | def _check_residue_distances(
function _get_atom_positions (line 518) | def _get_atom_positions(
function _extract_template_features (line 537) | def _extract_template_features(
function _build_query_to_hit_index_mapping (line 697) | def _build_query_to_hit_index_mapping(
class PrefilterResult (line 757) | class PrefilterResult:
class SingleHitResult (line 763) | class SingleHitResult:
function _prefilter_hit (line 769) | def _prefilter_hit(
function _process_single_hit (line 811) | def _process_single_hit(
function get_custom_template_features (line 932) | def get_custom_template_features(
class TemplateSearchResult (line 983) | class TemplateSearchResult:
class TemplateHitFeaturizer (line 989) | class TemplateHitFeaturizer:
method __init__ (line 991) | def __init__(
method get_templates (line 1062) | def get_templates(
FILE: vendor/openfold/openfold/data/tools/hhblits.py
class HHBlits (line 30) | class HHBlits:
method __init__ (line 33) | def __init__(
method query (line 102) | def query(self, input_fasta_path: str) -> Mapping[str, Any]:
FILE: vendor/openfold/openfold/data/tools/hhsearch.py
class HHSearch (line 26) | class HHSearch:
method __init__ (line 29) | def __init__(
method query (line 65) | def query(self, a3m: str) -> str:
FILE: vendor/openfold/openfold/data/tools/jackhmmer.py
class Jackhmmer (line 29) | class Jackhmmer:
method __init__ (line 32) | def __init__(
method _query_chunk (line 95) | def _query_chunk(
method query (line 183) | def query(self, input_fasta_path: str) -> Sequence[Mapping[str, Any]]:
FILE: vendor/openfold/openfold/data/tools/kalign.py
function _to_a3m (line 26) | def _to_a3m(sequences: Sequence[str]) -> str:
class Kalign (line 36) | class Kalign:
method __init__ (line 39) | def __init__(self, *, binary_path: str):
method align (line 50) | def align(self, sequences: Sequence[str]) -> str:
FILE: vendor/openfold/openfold/data/tools/utils.py
function tmpdir_manager (line 27) | def tmpdir_manager(base_dir: Optional[str] = None):
function timing (line 37) | def timing(msg: str):
function to_date (line 45) | def to_date(s: str):
FILE: vendor/openfold/openfold/model/dropout.py
class Dropout (line 22) | class Dropout(nn.Module):
method __init__ (line 30) | def __init__(self, r: float, batch_dim: Union[int, List[int]]):
method forward (line 46) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class DropoutRowwise (line 63) | class DropoutRowwise(Dropout):
class DropoutColumnwise (line 72) | class DropoutColumnwise(Dropout):
FILE: vendor/openfold/openfold/model/embedders.py
class InputEmbedder (line 24) | class InputEmbedder(nn.Module):
method __init__ (line 31) | def __init__(
method relpos (line 71) | def relpos(self, ri: torch.Tensor):
method forward (line 93) | def forward(
class PreembeddingEmbedder (line 142) | class PreembeddingEmbedder(nn.Module):
method __init__ (line 147) | def __init__(
method relpos (line 187) | def relpos(self, ri: torch.Tensor):
method forward (line 209) | def forward(
class RecyclingEmbedder (line 236) | class RecyclingEmbedder(nn.Module):
method __init__ (line 242) | def __init__(
method forward (line 278) | def forward(
class TemplateAngleEmbedder (line 338) | class TemplateAngleEmbedder(nn.Module):
method __init__ (line 345) | def __init__(
method forward (line 367) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class TemplatePairEmbedder (line 381) | class TemplatePairEmbedder(nn.Module):
method __init__ (line 388) | def __init__(
method forward (line 409) | def forward(
class ExtraMSAEmbedder (line 425) | class ExtraMSAEmbedder(nn.Module):
method __init__ (line 431) | def __init__(
method forward (line 451) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: vendor/openfold/openfold/model/evoformer.py
class MSATransition (line 45) | class MSATransition(nn.Module):
method __init__ (line 51) | def __init__(self, c_m, n):
method _transition (line 70) | def _transition(self, m, mask):
method _chunk (line 78) | def _chunk(self,
method forward (line 91) | def forward(
class EvoformerBlockCore (line 121) | class EvoformerBlockCore(nn.Module):
method __init__ (line 122) | def __init__(
method forward (line 179) | def forward(self,
class EvoformerBlock (line 313) | class EvoformerBlock(nn.Module):
method __init__ (line 314) | def __init__(self,
method forward (line 366) | def forward(self,
class ExtraMSABlock (line 438) | class ExtraMSABlock(nn.Module):
method __init__ (line 445) | def __init__(self,
method forward (line 497) | def forward(self,
class EvoformerStack (line 581) | class EvoformerStack(nn.Module):
method __init__ (line 588) | def __init__(
method _prep_blocks (line 684) | def _prep_blocks(self,
method _forward_offload (line 735) | def _forward_offload(self,
method forward (line 776) | def forward(self,
class ExtraMSAStack (line 839) | class ExtraMSAStack(nn.Module):
method __init__ (line 843) | def __init__(self,
method _prep_blocks (line 892) | def _prep_blocks(self,
method _forward_offload (line 941) | def _forward_offload(self,
method forward (line 976) | def forward(self,
FILE: vendor/openfold/openfold/model/heads.py
class AuxiliaryHeads (line 28) | class AuxiliaryHeads(nn.Module):
method __init__ (line 29) | def __init__(self, config):
method forward (line 55) | def forward(self, outputs):
class PerResidueLDDTCaPredictor (line 92) | class PerResidueLDDTCaPredictor(nn.Module):
method __init__ (line 93) | def __init__(self, no_bins, c_in, c_hidden):
method forward (line 108) | def forward(self, s):
class DistogramHead (line 119) | class DistogramHead(nn.Module):
method __init__ (line 126) | def __init__(self, c_z, no_bins, **kwargs):
method _forward (line 141) | def _forward(self, z): # [*, N, N, C_z]
method forward (line 154) | def forward(self, z):
class TMScoreHead (line 162) | class TMScoreHead(nn.Module):
method __init__ (line 167) | def __init__(self, c_z, no_bins, **kwargs):
method forward (line 182) | def forward(self, z):
class MaskedMSAHead (line 195) | class MaskedMSAHead(nn.Module):
method __init__ (line 200) | def __init__(self, c_m, c_out, **kwargs):
method forward (line 215) | def forward(self, m):
class ExperimentallyResolvedHead (line 228) | class ExperimentallyResolvedHead(nn.Module):
method __init__ (line 234) | def __init__(self, c_s, c_out, **kwargs):
method forward (line 249) | def forward(self, s):
FILE: vendor/openfold/openfold/model/model.py
class AlphaFold (line 56) | class AlphaFold(nn.Module):
method __init__ (line 63) | def __init__(self, config):
method embed_templates (line 124) | def embed_templates(self, batch, z, pair_mask, templ_dim, inplace_safe):
method iteration (line 224) | def iteration(self, feats, prevs, _recycle=True):
method forward (line 458) | def forward(self, batch):
FILE: vendor/openfold/openfold/model/msa.py
class MSAAttention (line 36) | class MSAAttention(nn.Module):
method __init__ (line 37) | def __init__(
method _chunk (line 90) | def _chunk(self,
method _prep_inputs (line 128) | def _prep_inputs(self,
method _chunked_msa_attn (line 170) | def _chunked_msa_attn(self,
method forward (line 218) | def forward(self,
class MSARowAttentionWithPairBias (line 290) | class MSARowAttentionWithPairBias(MSAAttention):
method __init__ (line 295) | def __init__(self, c_m, c_z, c_hidden, no_heads, inf=1e9):
class MSAColumnAttention (line 319) | class MSAColumnAttention(nn.Module):
method __init__ (line 327) | def __init__(self, c_m, c_hidden, no_heads, inf=1e9):
method forward (line 355) | def forward(self,
class MSAColumnGlobalAttention (line 394) | class MSAColumnGlobalAttention(nn.Module):
method __init__ (line 395) | def __init__(
method _chunk (line 417) | def _chunk(self,
method forward (line 439) | def forward(
FILE: vendor/openfold/openfold/model/outer_product_mean.py
class OuterProductMean (line 27) | class OuterProductMean(nn.Module):
method __init__ (line 32) | def __init__(self, c_m, c_z, c_hidden, eps=1e-3):
method _opm (line 54) | def _opm(self, a, b):
method _chunk (line 67) | def _chunk(self,
method _forward (line 97) | def _forward(self,
method forward (line 148) | def forward(self,
FILE: vendor/openfold/openfold/model/pair_transition.py
class PairTransition (line 24) | class PairTransition(nn.Module):
method __init__ (line 29) | def __init__(self, c_z, n):
method _transition (line 48) | def _transition(self, z, mask):
method _chunk (line 63) | def _chunk(self,
method forward (line 75) | def forward(self,
FILE: vendor/openfold/openfold/model/primitives.py
function _prod (line 49) | def _prod(nums):
function _calculate_fan (line 56) | def _calculate_fan(linear_weight_shape, fan="fan_in"):
function trunc_normal_init_ (line 71) | def trunc_normal_init_(weights, scale=1.0, fan="fan_in"):
function lecun_normal_init_ (line 85) | def lecun_normal_init_(weights):
function he_normal_init_ (line 89) | def he_normal_init_(weights):
function glorot_uniform_init_ (line 93) | def glorot_uniform_init_(weights):
function final_init_ (line 97) | def final_init_(weights):
function gating_init_ (line 102) | def gating_init_(weights):
function normal_init_ (line 107) | def normal_init_(weights):
function ipa_point_weights_init_ (line 111) | def ipa_point_weights_init_(weights):
class Linear (line 117) | class Linear(nn.Linear):
method __init__ (line 126) | def __init__(
class LayerNorm (line 185) | class LayerNorm(nn.Module):
method __init__ (line 186) | def __init__(self, c_in, eps=1e-5):
method forward (line 195) | def forward(self, x):
function softmax_no_cast (line 224) | def softmax_no_cast(t: torch.Tensor, dim: int = -1) -> torch.Tensor:
function _attention (line 245) | def _attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tens...
function _attention_chunked_trainable (line 264) | def _attention_chunked_trainable(
class Attention (line 318) | class Attention(nn.Module):
method __init__ (line 323) | def __init__(
method _prep_qkv (line 380) | def _prep_qkv(self,
method _wrap_up (line 405) | def _wrap_up(self,
method forward (line 424) | def forward(
class GlobalAttention (line 514) | class GlobalAttention(nn.Module):
method __init__ (line 515) | def __init__(self, c_in, c_hidden, no_heads, inf, eps):
method forward (line 539) | def forward(self,
function _lma (line 603) | def _lma(
function _flash_attn (line 666) | def _flash_attn(q, k, v, kv_mask):
FILE: vendor/openfold/openfold/model/structure_module.py
class AngleResnetBlock (line 47) | class AngleResnetBlock(nn.Module):
method __init__ (line 48) | def __init__(self, c_hidden):
method forward (line 63) | def forward(self, a: torch.Tensor) -> torch.Tensor:
class AngleResnet (line 75) | class AngleResnet(nn.Module):
method __init__ (line 80) | def __init__(self, c_in, c_hidden, no_blocks, no_angles, epsilon):
method forward (line 114) | def forward(
class InvariantPointAttention (line 161) | class InvariantPointAttention(nn.Module):
method __init__ (line 165) | def __init__(
method forward (line 231) | def forward(
class BackboneUpdate (line 434) | class BackboneUpdate(nn.Module):
method __init__ (line 439) | def __init__(self, c_s):
method forward (line 451) | def forward(self, s: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
class StructureModuleTransitionLayer (line 464) | class StructureModuleTransitionLayer(nn.Module):
method __init__ (line 465) | def __init__(self, c):
method forward (line 476) | def forward(self, s):
class StructureModuleTransition (line 489) | class StructureModuleTransition(nn.Module):
method __init__ (line 490) | def __init__(self, c, num_layers, dropout_rate):
method forward (line 505) | def forward(self, s):
class StructureModule (line 515) | class StructureModule(nn.Module):
method __init__ (line 516) | def __init__(
method forward (line 628) | def forward(
method _init_residue_constants (line 757) | def _init_residue_constants(self, float_dtype, device):
method torsion_angles_to_frames (line 802) | def torsion_angles_to_frames(self, r, alpha, f):
method frames_and_literature_positions_to_atom14_pos (line 808) | def frames_and_literature_positions_to_atom14_pos(
FILE: vendor/openfold/openfold/model/template.py
class TemplatePointwiseAttention (line 54) | class TemplatePointwiseAttention(nn.Module):
method __init__ (line 58) | def __init__(self, c_t, c_z, c_hidden, no_heads, inf, **kwargs):
method _chunk (line 85) | def _chunk(self,
method forward (line 105) | def forward(self,
class TemplatePairStackBlock (line 148) | class TemplatePairStackBlock(nn.Module):
method __init__ (line 149) | def __init__(
method forward (line 200) | def forward(self,
class TemplatePairStack (line 293) | class TemplatePairStack(nn.Module):
method __init__ (line 297) | def __init__(
method forward (line 353) | def forward(
function embed_templates_offload (line 413) | def embed_templates_offload(
function embed_templates_average (line 522) | def embed_templates_average(
FILE: vendor/openfold/openfold/model/torchscript.py
function script_preset_ (line 51) | def script_preset_(model: torch.nn.Module):
function _get_module_device (line 75) | def _get_module_device(module: torch.nn.Module) -> torch.device:
function _trace_module (line 88) | def _trace_module(module, batch_dims=None):
function _script_submodules_helper_ (line 149) | def _script_submodules_helper_(
function _trace_submodules_ (line 170) | def _trace_submodules_(
function script_submodules_ (line 183) | def script_submodules_(
FILE: vendor/openfold/openfold/model/triangular_attention.py
class TriangleAttention (line 31) | class TriangleAttention(nn.Module):
method __init__ (line 32) | def __init__(
method _chunk (line 61) | def _chunk(self,
method forward (line 88) | def forward(self,
class TriangleAttentionEndingNode (line 155) | class TriangleAttentionEndingNode(TriangleAttention):
FILE: vendor/openfold/openfold/model/triangular_multiplicative_update.py
class TriangleMultiplicativeUpdate (line 28) | class TriangleMultiplicativeUpdate(nn.Module):
method __init__ (line 32) | def __init__(self, c_z, c_hidden, _outgoing=True):
method _combine_projections (line 57) | def _combine_projections(self,
method _inference_forward (line 87) | def _inference_forward(self,
method forward (line 358) | def forward(self,
class TriangleMultiplicationOutgoing (line 419) | class TriangleMultiplicationOutgoing(TriangleMultiplicativeUpdate):
class TriangleMultiplicationIncoming (line 426) | class TriangleMultiplicationIncoming(TriangleMultiplicativeUpdate):
FILE: vendor/openfold/openfold/np/protein.py
class Protein (line 40) | class Protein:
function from_pdb_string (line 77) | def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Pro...
function from_proteinnet_string (line 174) | def from_proteinnet_string(proteinnet_str: str) -> Protein:
function get_pdb_headers (line 227) | def get_pdb_headers(prot: Protein, chain_id: int = 0) -> Sequence[str]:
function add_pdb_headers (line 249) | def add_pdb_headers(prot: Protein, pdb_str: str) -> str:
function to_pdb (line 299) | def to_pdb(prot: Protein) -> str:
function to_modelcif (line 394) | def to_modelcif(prot: Protein) -> str:
function ideal_atom_mask (line 522) | def ideal_atom_mask(prot: Protein) -> np.ndarray:
function from_prediction (line 538) | def from_prediction(
FILE: vendor/openfold/openfold/np/relax/amber_minimize.py
function will_restrain (line 40) | def will_restrain(atom: openmm_app.Atom, rset: str) -> bool:
function _add_restraints (line 49) | def _add_restraints(
function _openmm_minimize (line 79) | def _openmm_minimize(
function _get_pdb_string (line 118) | def _get_pdb_string(topology: openmm_app.Topology, positions: unit.Quant...
function _check_cleaned_atoms (line 125) | def _check_cleaned_atoms(pdb_cleaned_string: str, pdb_ref_string: str):
function _check_residues_are_well_defined (line 149) | def _check_residues_are_well_defined(prot: protein.Protein):
function _check_atom_mask_is_ideal (line 159) | def _check_atom_mask_is_ideal(prot):
function clean_protein (line 166) | def clean_protein(prot: protein.Protein, checks: bool = True):
function make_atom14_positions (line 203) | def make_atom14_positions(prot):
function find_violations (line 359) | def find_violations(prot_np: protein.Protein):
function get_violation_metrics (line 398) | def get_violation_metrics(prot: protein.Protein):
function _run_one_iteration (line 411) | def _run_one_iteration(
function run_pipeline (line 474) | def run_pipeline(
function get_initial_energies (line 568) | def get_initial_energies(
FILE: vendor/openfold/openfold/np/relax/cleanup.py
function fix_pdb (line 27) | def fix_pdb(pdbfile, alterations_info):
function clean_structure (line 64) | def clean_structure(pdb_structure, alterations_info):
function _remove_heterogens (line 75) | def _remove_heterogens(fixer, alterations_info, keep_water):
function _replace_met_se (line 97) | def _replace_met_se(pdb_structure, alterations_info):
function _remove_chains_of_length_one (line 111) | def _remove_chains_of_length_one(pdb_structure, alterations_info):
FILE: vendor/openfold/openfold/np/relax/relax.py
class AmberRelaxation (line 23) | class AmberRelaxation(object):
method __init__ (line 25) | def __init__(
method process (line 59) | def process(
FILE: vendor/openfold/openfold/np/relax/utils.py
function overwrite_pdb_coordinates (line 25) | def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
function overwrite_b_factors (line 34) | def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str:
function assert_equal_nonterminal_atom_types (line 76) | def assert_equal_nonterminal_atom_types(
FILE: vendor/openfold/openfold/np/residue_constants.py
function load_stereo_chemical_props (line 441) | def load_stereo_chemical_props() -> Tuple[
function sequence_to_onehot (line 886) | def sequence_to_onehot(
function _make_standard_atom_mask (line 1039) | def _make_standard_atom_mask() -> np.ndarray:
function chi_angle_atom (line 1057) | def chi_angle_atom(atom_index: int) -> np.ndarray:
function _make_rigid_transformation_4x4 (line 1104) | def _make_rigid_transformation_4x4(ex, ey, translation):
function _make_rigid_group_constants (line 1135) | def _make_rigid_group_constants():
function make_atom14_dists_bounds (line 1220) | def make_atom14_dists_bounds(
function _make_atom14_ambiguity_feats (line 1289) | def _make_atom14_ambiguity_feats():
function aatype_to_str_sequence (line 1308) | def aatype_to_str_sequence(aatype):
FILE: vendor/openfold/openfold/utils/argparse.py
class ArgparseAlphabetizer (line 4) | class ArgparseAlphabetizer(HelpFormatter):
method sort_actions (line 10) | def sort_actions(actions):
method add_arguments (line 14) | def add_arguments(self, actions):
method add_usage (line 19) | def add_usage(self, usage, actions, groups, prefix=None):
function remove_arguments (line 25) | def remove_arguments(parser, args):
FILE: vendor/openfold/openfold/utils/callbacks.py
class EarlyStoppingVerbose (line 4) | class EarlyStoppingVerbose(EarlyStopping):
method _evalute_stopping_criteria (line 9) | def _evalute_stopping_criteria(self, *args, **kwargs):
FILE: vendor/openfold/openfold/utils/checkpointing.py
function get_checkpoint_fn (line 29) | def get_checkpoint_fn():
function checkpoint_blocks (line 43) | def checkpoint_blocks(
FILE: vendor/openfold/openfold/utils/chunk_utils.py
function _fetch_dims (line 27) | def _fetch_dims(tree):
function _flat_idx_to_idx (line 45) | def _flat_idx_to_idx(
function _get_minimal_slice_set (line 58) | def _get_minimal_slice_set(
function _chunk_slice (line 176) | def _chunk_slice(
function chunk_layer (line 212) | def chunk_layer(
class ChunkSizeTuner (line 342) | class ChunkSizeTuner:
method __init__ (line 343) | def __init__(self,
method _determine_favorable_chunk_size (line 352) | def _determine_favorable_chunk_size(self, fn, args, min_chunk_size):
method _compare_arg_caches (line 383) | def _compare_arg_caches(self, ac1, ac2):
method tune_chunk_size (line 402) | def tune_chunk_size(self,
FILE: vendor/openfold/openfold/utils/exponential_moving_average.py
class ExponentialMovingAverage (line 9) | class ExponentialMovingAverage:
method __init__ (line 21) | def __init__(self, model: nn.Module, decay: float):
method to (line 37) | def to(self, device):
method _update_state_dict_ (line 41) | def _update_state_dict_(self, update, state_dict):
method update (line 52) | def update(self, model: torch.nn.Module) -> None:
method load_state_dict (line 60) | def load_state_dict(self, state_dict: OrderedDict) -> None:
method state_dict (line 65) | def state_dict(self) -> OrderedDict:
FILE: vendor/openfold/openfold/utils/feats.py
function pseudo_beta_fn (line 34) | def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks):
function atom14_to_atom37 (line 55) | def atom14_to_atom37(atom14, batch):
function build_template_angle_feat (line 68) | def build_template_angle_feat(template_feats):
function build_template_pair_feat (line 92) | def build_template_pair_feat(
function build_extra_msa_feat (line 162) | def build_extra_msa_feat(batch):
function torsion_angles_to_frames (line 172) | def torsion_angles_to_frames(
function frames_and_literature_positions_to_atom14_pos (line 238) | def frames_and_literature_positions_to_atom14_pos(
FILE: vendor/openfold/openfold/utils/import_weights.py
class ParamType (line 28) | class ParamType(Enum):
method __init__ (line 44) | def __init__(self, fn):
class Param (line 49) | class Param:
function process_translation_dict (line 55) | def process_translation_dict(d, top_layer=True):
function stacked (line 74) | def stacked(param_dict_list, out=None):
function assign (line 103) | def assign(translation_dict, orig_weights):
function generate_translation_dict (line 125) | def generate_translation_dict(model, version):
function import_jax_weights_ (line 435) | def import_jax_weights_(model, npz_path, version="model_1"):
FILE: vendor/openfold/openfold/utils/kernel/attention_core.py
class AttentionCoreFunction (line 26) | class AttentionCoreFunction(torch.autograd.Function):
method forward (line 28) | def forward(ctx, q, k, v, bias_1=None, bias_2=None):
method backward (line 62) | def backward(ctx, grad_output):
FILE: vendor/openfold/openfold/utils/kernel/csrc/softmax_cuda.cpp
function PYBIND11_MODULE (line 33) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: vendor/openfold/openfold/utils/kernel/csrc/softmax_cuda_stub.cpp
function attn_softmax_inplace_forward_ (line 19) | void attn_softmax_inplace_forward_(
function attn_softmax_inplace_backward_ (line 26) | void attn_softmax_inplace_backward_(
FILE: vendor/openfold/openfold/utils/logger.py
function is_main_process (line 25) | def is_main_process():
class PerformanceLoggingCallback (line 29) | class PerformanceLoggingCallback(Callback):
method __init__ (line 30) | def __init__(self, log_file, global_batch_size, warmup_steps: int = 0,...
method do_step (line 38) | def do_step(self):
method on_train_batch_start (line 45) | def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, d...
method on_test_batch_start (line 48) | def on_test_batch_start(self, trainer, pl_module, batch, batch_idx, da...
method process_performance_stats (line 51) | def process_performance_stats(self, deltas):
method _log (line 66) | def _log(self):
method on_train_end (line 74) | def on_train_end(self, trainer, pl_module):
method on_epoch_end (line 79) | def on_epoch_end(self, trainer, pl_module):
FILE: vendor/openfold/openfold/utils/loss.py
function softmax_cross_entropy (line 37) | def softmax_cross_entropy(logits, labels):
function sigmoid_cross_entropy (line 45) | def sigmoid_cross_entropy(logits, labels):
function torsion_angle_loss (line 58) | def torsion_angle_loss(
function compute_fape (line 82) | def compute_fape(
function backbone_loss (line 156) | def backbone_loss(
function sidechain_loss (line 214) | def sidechain_loss(
function fape_loss (line 264) | def fape_loss(
function supervised_chi_loss (line 288) | def supervised_chi_loss(
function compute_plddt (line 374) | def compute_plddt(logits: torch.Tensor) -> torch.Tensor:
function lddt (line 388) | def lddt(
function lddt_ca (line 444) | def lddt_ca(
function lddt_loss (line 467) | def lddt_loss(
function distogram_loss (line 519) | def distogram_loss(
function _calculate_bin_centers (line 567) | def _calculate_bin_centers(boundaries: torch.Tensor):
function _calculate_expected_aligned_error (line 576) | def _calculate_expected_aligned_error(
function compute_predicted_aligned_error (line 587) | def compute_predicted_aligned_error(
function compute_tm (line 627) | def compute_tm(
function tm_loss (line 661) | def tm_loss(
function between_residue_bond_loss (line 718) | def between_residue_bond_loss(
function between_residue_clash_loss (line 877) | def between_residue_clash_loss(
function within_residue_violations (line 1024) | def within_residue_violations(
function find_structural_violations (line 1111) | def find_structural_violations(
function find_structural_violations_np (line 1224) | def find_structural_violations_np(
function extreme_ca_ca_distance_violations (line 1241) | def extreme_ca_ca_distance_violations(
function compute_violation_metrics (line 1278) | def compute_violation_metrics(
function compute_violation_metrics_np (line 1321) | def compute_violation_metrics_np(
function violation_loss (line 1337) | def violation_loss(
function compute_renamed_ground_truth (line 1362) | def compute_renamed_ground_truth(
function experimentally_resolved_loss (line 1470) | def experimentally_resolved_loss(
function masked_msa_loss (line 1494) | def masked_msa_loss(logits, true_msa, bert_mask, eps=1e-8, **kwargs):
class AlphaFoldLoss (line 1527) | class AlphaFoldLoss(nn.Module):
method __init__ (line 1529) | def __init__(self, config):
method forward (line 1533) | def forward(self, out, batch, _return_breakdown=False):
FILE: vendor/openfold/openfold/utils/lr_schedulers.py
class AlphaFoldLRScheduler (line 4) | class AlphaFoldLRScheduler(torch.optim.lr_scheduler._LRScheduler):
method __init__ (line 13) | def __init__(self,
method state_dict (line 54) | def state_dict(self):
method load_state_dict (line 61) | def load_state_dict(self, state_dict):
method get_lr (line 64) | def get_lr(self):
FILE: vendor/openfold/openfold/utils/precision_utils.py
function is_fp16_enabled (line 18) | def is_fp16_enabled():
FILE: vendor/openfold/openfold/utils/rigid_utils.py
function rot_matmul (line 24) | def rot_matmul(
function rot_vec_mul (line 64) | def rot_vec_mul(
function identity_rot_mats (line 89) | def identity_rot_mats(
function identity_trans (line 106) | def identity_trans(
function identity_quats (line 122) | def identity_quats(
function _to_mat (line 146) | def _to_mat(pairs):
function quat_to_rot (line 168) | def quat_to_rot(quat: torch.Tensor) -> torch.Tensor:
function rot_to_quat (line 191) | def rot_to_quat(
function _get_quat (line 243) | def _get_quat(quat_key, dtype, device):
function quat_multiply (line 247) | def quat_multiply(quat1, quat2):
function quat_multiply_by_vec (line 259) | def quat_multiply_by_vec(quat, vec):
function invert_rot_mat (line 271) | def invert_rot_mat(rot_mat: torch.Tensor):
function invert_quat (line 275) | def invert_quat(quat: torch.Tensor):
class Rotation (line 282) | class Rotation:
method __init__ (line 292) | def __init__(self,
method identity (line 331) | def identity(
method __getitem__ (line 371) | def __getitem__(self, index: Any) -> Rotation:
method __mul__ (line 394) | def __mul__(self,
method __rmul__ (line 419) | def __rmul__(self,
method shape (line 436) | def shape(self) -> torch.Size:
method dtype (line 456) | def dtype(self) -> torch.dtype:
method device (line 471) | def device(self) -> torch.device:
method requires_grad (line 486) | def requires_grad(self) -> bool:
method get_rot_mats (line 500) | def get_rot_mats(self) -> torch.Tensor:
method get_quats (line 516) | def get_quats(self) -> torch.Tensor:
method get_cur_rot (line 535) | def get_cur_rot(self) -> torch.Tensor:
method compose_q_update_vec (line 551) | def compose_q_update_vec(self,
method compose_r (line 578) | def compose_r(self, r: Rotation) -> Rotation:
method compose_q (line 594) | def compose_q(self, r: Rotation, normalize_quats: bool = True) -> Rota...
method apply (line 615) | def apply(self, pts: torch.Tensor) -> torch.Tensor:
method invert_apply (line 629) | def invert_apply(self, pts: torch.Tensor) -> torch.Tensor:
method invert (line 643) | def invert(self) -> Rotation:
method unsqueeze (line 666) | def unsqueeze(self,
method cat (line 691) | def cat(
method map_tensor_fn (line 716) | def map_tensor_fn(self,
method cuda (line 745) | def cuda(self) -> Rotation:
method to (line 763) | def to(self,
method detach (line 792) | def detach(self) -> Rotation:
class Rigid (line 813) | class Rigid:
method __init__ (line 820) | def __init__(self,
method identity (line 865) | def identity(
method __getitem__ (line 892) | def __getitem__(self,
method __mul__ (line 923) | def __mul__(self,
method __rmul__ (line 944) | def __rmul__(self,
method shape (line 960) | def shape(self) -> torch.Size:
method device (line 972) | def device(self) -> torch.device:
method get_rots (line 981) | def get_rots(self) -> Rotation:
method get_trans (line 990) | def get_trans(self) -> torch.Tensor:
method compose_q_update_vec (line 999) | def compose_q_update_vec(self,
method compose (line 1021) | def compose(self,
method apply (line 1037) | def apply(self,
method invert_apply (line 1051) | def invert_apply(self,
method invert (line 1065) | def invert(self) -> Rigid:
method map_tensor_fn (line 1077) | def map_tensor_fn(self,
method to_tensor_4x4 (line 1099) | def to_tensor_4x4(self) -> torch.Tensor:
method from_tensor_4x4 (line 1113) | def from_tensor_4x4(
method to_tensor_7 (line 1133) | def to_tensor_7(self) -> torch.Tensor:
method from_tensor_7 (line 1148) | def from_tensor_7(
method from_3_points (line 1166) | def from_3_points(
method unsqueeze (line 1210) | def unsqueeze(self,
method cat (line 1230) | def cat(
method apply_rot_fn (line 1253) | def apply_rot_fn(self, fn: Callable[Rotation, Rotation]) -> Rigid:
method apply_trans_fn (line 1265) | def apply_trans_fn(self, fn: Callable[torch.Tensor, torch.Tensor]) -> ...
method scale_translation (line 1278) | def scale_translation(self, trans_scale_factor: float) -> Rigid:
method stop_rot_gradient (line 1291) | def stop_rot_gradient(self) -> Rigid:
method make_transform_from_reference (line 1302) | def make_transform_from_reference(n_xyz, ca_xyz, c_xyz, eps=1e-20):
method cuda (line 1374) | def cuda(self) -> Rigid:
FILE: vendor/openfold/openfold/utils/script_utils.py
function count_models_to_evaluate (line 26) | def count_models_to_evaluate(openfold_checkpoint_path, jax_param_path):
function get_model_basename (line 35) | def get_model_basename(model_path):
function make_output_directory (line 43) | def make_output_directory(output_dir, model_name, multiple_model_mode):
function load_models_from_command_line (line 52) | def load_models_from_command_line(config, model_device, openfold_checkpo...
function parse_fasta (line 117) | def parse_fasta(data):
function update_timings (line 130) | def update_timings(timing_dict, output_file=os.path.join(os.getcwd(), "t...
function run_model (line 149) | def run_model(model, batch, tag, output_dir):
function prep_output (line 169) | def prep_output(out, batch, feature_dict, feature_processor, config_pres...
function relax_protein (line 231) | def relax_protein(config, model_device, unrelaxed_protein, output_direct...
FILE: vendor/openfold/openfold/utils/seed.py
function seed_globally (line 10) | def seed_globally(seed=None):
FILE: vendor/openfold/openfold/utils/superimposition.py
function _superimpose_np (line 19) | def _superimpose_np(reference, coords):
function _superimpose_single (line 37) | def _superimpose_single(reference, coords):
function superimpose (line 44) | def superimpose(reference, coords, mask):
FILE: vendor/openfold/openfold/utils/suppress_output.py
class SuppressStdout (line 5) | class SuppressStdout:
method __enter__ (line 6) | def __enter__(self):
method __exit__ (line 11) | def __exit__(self, typ, value, traceback):
class SuppressLogging (line 17) | class SuppressLogging:
method __init__ (line 18) | def __init__(self, level):
method __enter__ (line 21) | def __enter__(self):
method __exit__ (line 24) | def __exit__(self, typ, value, traceback):
FILE: vendor/openfold/openfold/utils/tensor_utils.py
function add (line 24) | def add(m1, m2, inplace):
function permute_final_dims (line 35) | def permute_final_dims(tensor: torch.Tensor, inds: List[int]):
function flatten_final_dims (line 41) | def flatten_final_dims(t: torch.Tensor, no_dims: int):
function masked_mean (line 45) | def masked_mean(mask, value, dim, eps=1e-4):
function pts_to_distogram (line 50) | def pts_to_distogram(pts, min_bin=2.3125, max_bin=21.6875, no_bins=64):
function dict_multimap (line 60) | def dict_multimap(fn, dicts):
function one_hot (line 73) | def one_hot(x, v_bins):
function batched_gather (line 80) | def batched_gather(data, inds, dim=0, no_batch_dims=0):
function dict_map (line 96) | def dict_map(fn, dic, leaf_type):
function tree_map (line 107) | def tree_map(fn, tree, leaf_type):
FILE: vendor/openfold/openfold/utils/trace_utils.py
function pad_feature_dict_seq (line 23) | def pad_feature_dict_seq(feature_dict, seqlen):
function trace_model_ (line 61) | def trace_model_(model, sample_input):
FILE: vendor/openfold/openfold/utils/validation_metrics.py
function drmsd (line 17) | def drmsd(structure_1, structure_2, mask=None):
function drmsd_np (line 39) | def drmsd_np(structure_1, structure_2, mask=None):
function gdt (line 48) | def gdt(p1, p2, mask, cutoffs):
function gdt_ts (line 63) | def gdt_ts(p1, p2, mask):
function gdt_ha (line 67) | def gdt_ha(p1, p2, mask):
FILE: vendor/openfold/run_pretrained_openfold.py
function precompute_alignments (line 65) | def precompute_alignments(tags, seqs, alignment_dir, args):
function round_up_seqlen (line 112) | def round_up_seqlen(seqlen):
function generate_feature_dict (line 116) | def generate_feature_dict(
function list_files_with_extensions (line 150) | def list_files_with_extensions(dir, extensions):
function main (line 154) | def main(args):
FILE: vendor/openfold/scripts/alignment_db_scripts/create_alignment_db.py
function main (line 6) | def main(args):
FILE: vendor/openfold/scripts/alignment_db_scripts/unify_alignment_db_indices.py
function main (line 9) | def main(args):
FILE: vendor/openfold/scripts/convert_of_weights_to_jax.py
function reshape_fn (line 33) | def reshape_fn(of_param, af_weight):
function transfer (line 51) | def transfer(of_dict, af_weight_template):
function main (line 61) | def main(args):
FILE: vendor/openfold/scripts/data_dir_to_fasta.py
function main (line 9) | def main(args):
FILE: vendor/openfold/scripts/download_cameo.py
function generate_url (line 21) | def generate_url(period, end_date):
function main (line 32) | def main(args):
FILE: vendor/openfold/scripts/generate_alphafold_feature_dict.py
function main (line 10) | def main(args):
FILE: vendor/openfold/scripts/generate_chain_data_cache.py
function parse_file (line 17) | def parse_file(
function main (line 70) | def main(args):
FILE: vendor/openfold/scripts/generate_mmcif_cache.py
function parse_file (line 16) | def parse_file(f, args):
function main (line 40) | def main(args):
FILE: vendor/openfold/scripts/precompute_alignments.py
function run_seq_group_alignments (line 22) | def run_seq_group_alignments(seq_groups, alignment_runner, args):
function parse_and_align (line 64) | def parse_and_align(files, alignment_runner, args):
function main (line 115) | def main(args):
FILE: vendor/openfold/scripts/precompute_alignments_mmseqs.py
function _split_a3ms (line 10) | def _split_a3ms(output_dir):
function main (line 35) | def main(args):
FILE: vendor/openfold/scripts/precompute_embeddings.py
class SequenceDataset (line 12) | class SequenceDataset(object):
method __init__ (line 13) | def __init__(self, labels, sequences) -> None:
method from_file (line 18) | def from_file(cls, fasta_file):
method __len__ (line 30) | def __len__(self):
method __getitem__ (line 33) | def __getitem__(self, idx):
method get_batch_indices (line 36) | def get_batch_indices(self, toks_per_batch, extra_toks_per_seq):
class EmbeddingGenerator (line 62) | class EmbeddingGenerator:
method __init__ (line 64) | def __init__(self,
method parse_sequences (line 83) | def parse_sequences(self, fasta_dir, output_dir):
method run (line 108) | def run(
function main (line 151) | def main(args):
FILE: vendor/openfold/scripts/prep_proteinnet_msas.py
function main (line 7) | def main(args):
FILE: vendor/openfold/scripts/unpack_proteinnet.py
function _write_file (line 6) | def _write_file(args, file_in_progress):
function main (line 14) | def main(args):
FILE: vendor/openfold/scripts/utils.py
function add_data_args (line 7) | def add_data_args(parser: argparse.ArgumentParser):
function get_nvidia_cc (line 47) | def get_nvidia_cc():
FILE: vendor/openfold/scripts/zero_to_fp32.py
function get_model_state_file (line 29) | def get_model_state_file(checkpoint_dir, zero_stage):
function get_optim_files (line 45) | def get_optim_files(checkpoint_dir):
function parse_model_state (line 56) | def parse_model_state(file):
function parse_optim_states (line 74) | def parse_optim_states(files, ds_checkpoint_dir):
function _get_fp32_state_dict_from_zero_checkpoint (line 127) | def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir):
function _get_fp32_state_dict_from_zero2_checkpoint (line 157) | def _get_fp32_state_dict_from_zero2_checkpoint(world_size,
function zero3_partitioned_param_info (line 252) | def zero3_partitioned_param_info(unpartitioned_numel, world_size):
function _get_fp32_state_dict_from_zero3_checkpoint (line 259) | def _get_fp32_state_dict_from_zero3_checkpoint(world_size,
function get_fp32_state_dict_from_zero_checkpoint (line 332) | def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None):
function convert_zero_checkpoint_to_fp32_state_dict (line 381) | def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_fi...
function load_state_dict_from_zero_checkpoint (line 397) | def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
function get_global_step_from_zero_checkpoint (line 435) | def get_global_step_from_zero_checkpoint(checkpoint_dir):
FILE: vendor/openfold/setup.py
function get_cuda_bare_metal_version (line 40) | def get_cuda_bare_metal_version(cuda_dir):
FILE: vendor/openfold/tests/compare_utils.py
function alphafold_is_installed (line 22) | def alphafold_is_installed():
function skip_unless_alphafold_installed (line 26) | def skip_unless_alphafold_installed():
function import_alphafold (line 30) | def import_alphafold():
function get_alphafold_config (line 48) | def get_alphafold_config():
function get_global_pretrained_openfold (line 58) | def get_global_pretrained_openfold():
function _get_orig_weights (line 77) | def _get_orig_weights():
function _remove_key_prefix (line 85) | def _remove_key_prefix(d, prefix):
function fetch_alphafold_module_weights (line 92) | def fetch_alphafold_module_weights(weight_path):
FILE: vendor/openfold/tests/data_utils.py
function random_template_feats (line 19) | def random_template_feats(n_templ, n, batch_size=None):
function random_extra_msa_feats (line 45) | def random_extra_msa_feats(n_extra, n, batch_size=None):
function random_affines_vector (line 66) | def random_affines_vector(dim):
function random_affines_4x4 (line 82) | def random_affines_4x4(dim):
FILE: vendor/openfold/tests/test_data_pipeline.py
class TestDataPipeline (line 38) | class TestDataPipeline(unittest.TestCase):
method test_fasta_compare (line 40) | def test_fasta_compare(self):
FILE: vendor/openfold/tests/test_data_transforms.py
class TestDataTransforms (line 19) | class TestDataTransforms(unittest.TestCase):
method test_make_seq_mask (line 20) | def test_make_seq_mask(self):
method test_add_distillation_flag (line 30) | def test_add_distillation_flag(self):
method test_make_all_atom_aatype (line 36) | def test_make_all_atom_aatype(self):
method test_fix_templates_aatype (line 46) | def test_fix_templates_aatype(self):
method test_correct_msa_restypes (line 57) | def test_correct_msa_restypes(self):
method test_squeeze_features (line 65) | def test_squeeze_features(self):
method test_randomly_replace_msa_with_unknown (line 91) | def test_randomly_replace_msa_with_unknown(self):
method test_sample_msa (line 103) | def test_sample_msa(self):
method test_crop_extra_msa (line 121) | def test_crop_extra_msa(self):
method test_delete_extra_msa (line 134) | def test_delete_extra_msa(self):
method test_nearest_neighbor_clusters (line 144) | def test_nearest_neighbor_clusters(self):
method test_make_msa_mask (line 155) | def test_make_msa_mask(self):
method test_make_hhblits_profile (line 165) | def test_make_hhblits_profile(self):
method test_make_masked_msa (line 174) | def test_make_masked_msa(self):
method test_make_msa_feat (line 192) | def test_make_msa_feat(self):
method test_crop_templates (line 206) | def test_crop_templates(self):
method test_make_atom14_masks (line 217) | def test_make_atom14_masks(self):
FILE: vendor/openfold/tests/test_embedders.py
class TestInputEmbedder (line 27) | class TestInputEmbedder(unittest.TestCase):
method test_shape (line 28) | def test_shape(self):
class TestPreembeddingEmbedder (line 50) | class TestPreembeddingEmbedder(unittest.TestCase):
method test_shape (line 51) | def test_shape(self):
class TestRecyclingEmbedder (line 72) | class TestRecyclingEmbedder(unittest.TestCase):
method test_shape (line 73) | def test_shape(self):
class TestTemplateAngleEmbedder (line 94) | class TestTemplateAngleEmbedder(unittest.TestCase):
method test_shape (line 95) | def test_shape(self):
class TestTemplatePairEmbedder (line 113) | class TestTemplatePairEmbedder(unittest.TestCase):
method test_shape (line 114) | def test_shape(self):
FILE: vendor/openfold/tests/test_evoformer.py
class TestEvoformerStack (line 33) | class TestEvoformerStack(unittest.TestCase):
method test_shape (line 34) | def test_shape(self):
method test_shape_without_column_attention (line 90) | def test_shape_without_column_attention(self):
method test_compare (line 147) | def test_compare(self):
class TestExtraMSAStack (line 217) | class TestExtraMSAStack(unittest.TestCase):
method test_shape (line 218) | def test_shape(self):
class TestMSATransition (line 285) | class TestMSATransition(unittest.TestCase):
method test_shape (line 286) | def test_shape(self):
method test_compare (line 304) | def test_compare(self):
FILE: vendor/openfold/tests/test_feats.py
class TestFeats (line 42) | class TestFeats(unittest.TestCase):
method test_pseudo_beta_fn_compare (line 44) | def test_pseudo_beta_fn_compare(self):
method test_atom37_to_torsion_angles_compare (line 82) | def test_atom37_to_torsion_angles_compare(self):
method test_atom37_to_frames_compare (line 132) | def test_atom37_to_frames_compare(self):
method test_torsion_angles_to_frames_shape (line 185) | def test_torsion_angles_to_frames_shape(self):
method test_torsion_angles_to_frames_compare (line 207) | def test_torsion_angles_to_frames_compare(self):
method test_frames_and_literature_positions_to_atom14_pos_shape (line 261) | def test_frames_and_literature_positions_to_atom14_pos_shape(self):
method test_frames_and_literature_positions_to_atom14_pos_compare (line 283) | def test_frames_and_literature_positions_to_atom14_pos_compare(self):
FILE: vendor/openfold/tests/test_import_weights.py
class TestImportWeights (line 24) | class TestImportWeights(unittest.TestCase):
method test_import_jax_weights_ (line 25) | def test_import_jax_weights_(self):
FILE: vendor/openfold/tests/test_kernels.py
class TestAttentionCore (line 11) | class TestAttentionCore(unittest.TestCase):
method test_attention_core_forward (line 12) | def test_attention_core_forward(self):
method test_attention_core_backward (line 30) | def test_attention_core_backward(self):
FILE: vendor/openfold/tests/test_loss.py
function affine_vector_to_4x4 (line 62) | def affine_vector_to_4x4(affine):
class TestLoss (line 67) | class TestLoss(unittest.TestCase):
method test_run_torsion_angle_loss (line 68) | def test_run_torsion_angle_loss(self):
method test_run_fape (line 78) | def test_run_fape(self):
method test_run_between_residue_bond_loss (line 105) | def test_run_between_residue_bond_loss(self):
method test_between_residue_bond_loss_compare (line 128) | def test_between_residue_bond_loss_compare(self):
method test_run_between_residue_clash_loss (line 169) | def test_run_between_residue_clash_loss(self):
method test_between_residue_clash_loss_compare (line 186) | def test_between_residue_clash_loss_compare(self):
method test_compute_plddt_compare (line 231) | def test_compute_plddt_compare(self):
method test_find_structural_violations (line 245) | def test_find_structural_violations(self):
method test_find_structural_violations_compare (line 265) | def test_find_structural_violations_compare(self):
method test_compute_renamed_ground_truth_compare (line 318) | def test_compute_renamed_ground_truth_compare(self):
method test_msa_loss_compare (line 369) | def test_msa_loss_compare(self):
method test_distogram_loss_compare (line 409) | def test_distogram_loss_compare(self):
method test_experimentally_resolved_loss_compare (line 460) | def test_experimentally_resolved_loss_compare(self):
method test_supervised_chi_loss_compare (line 504) | def test_supervised_chi_loss_compare(self):
method test_violation_loss_compare (line 565) | def test_violation_loss_compare(self):
method test_lddt_loss_compare (line 620) | def test_lddt_loss_compare(self):
method test_backbone_loss_compare (line 674) | def test_backbone_loss_compare(self):
method test_sidechain_loss_compare (line 724) | def test_sidechain_loss_compare(self):
method test_tm_loss_compare (line 819) | def test_tm_loss_compare(self):
FILE: vendor/openfold/tests/test_model.py
class TestModel (line 38) | class TestModel(unittest.TestCase):
method test_dry_run (line 39) | def test_dry_run(self):
method test_dry_run_seqemb_mode (line 80) | def test_dry_run_seqemb_mode(self):
method test_compare (line 121) | def test_compare(self):
FILE: vendor/openfold/tests/test_msa.py
class TestMSARowAttentionWithPairBias (line 33) | class TestMSARowAttentionWithPairBias(unittest.TestCase):
method test_shape (line 34) | def test_shape(self):
method test_compare (line 56) | def test_compare(self):
class TestMSAColumnAttention (line 102) | class TestMSAColumnAttention(unittest.TestCase):
method test_shape (line 103) | def test_shape(self):
method test_compare (line 122) | def test_compare(self):
class TestMSAColumnGlobalAttention (line 164) | class TestMSAColumnGlobalAttention(unittest.TestCase):
method test_shape (line 165) | def test_shape(self):
method test_compare (line 184) | def test_compare(self):
FILE: vendor/openfold/tests/test_outer_product_mean.py
class TestOuterProductMean (line 29) | class TestOuterProductMean(unittest.TestCase):
method test_shape (line 30) | def test_shape(self):
method test_opm_compare (line 49) | def test_opm_compare(self):
FILE: vendor/openfold/tests/test_pair_transition.py
class TestPairTransition (line 29) | class TestPairTransition(unittest.TestCase):
method test_shape (line 30) | def test_shape(self):
method test_compare (line 48) | def test_compare(self):
FILE: vendor/openfold/tests/test_primitives.py
class TestLMA (line 25) | class TestLMA(unittest.TestCase):
method test_lma_vs_attention (line 26) | def test_lma_vs_attention(self):
FILE: vendor/openfold/tests/test_structure_module.py
class TestStructureModule (line 48) | class TestStructureModule(unittest.TestCase):
method test_structure_module_shape (line 49) | def test_structure_module_shape(self):
method test_structure_module_transition_shape (line 100) | def test_structure_module_transition_shape(self):
method test_structure_module_compare (line 118) | def test_structure_module_compare(self):
class TestInvariantPointAttention (line 183) | class TestInvariantPointAttention(unittest.TestCase):
method test_shape (line 184) | def test_shape(self):
method test_ipa_compare (line 215) | def test_ipa_compare(self):
class TestAngleResnet (line 271) | class TestAngleResnet(unittest.TestCase):
method test_shape (line 272) | def test_shape(self):
FILE: vendor/openfold/tests/test_template.py
class TestTemplatePointwiseAttention (line 33) | class TestTemplatePointwiseAttention(unittest.TestCase):
method test_shape (line 34) | def test_shape(self):
class TestTemplatePairStack (line 56) | class TestTemplatePairStack(unittest.TestCase):
method test_shape (line 57) | def test_shape(self):
method test_compare (line 95) | def test_compare(self):
class Template (line 145) | class Template(unittest.TestCase):
method test_compare (line 147) | def test_compare(self):
FILE: vendor/openfold/tests/test_triangular_attention.py
class TestTriangularAttention (line 31) | class TestTriangularAttention(unittest.TestCase):
method test_shape (line 32) | def test_shape(self):
method _tri_att_compare (line 50) | def _tri_att_compare(self, starting=False):
method test_tri_att_end_compare (line 108) | def test_tri_att_end_compare(self):
method test_tri_att_start_compare (line 112) | def test_tri_att_start_compare(self):
FILE: vendor/openfold/tests/test_triangular_multiplicative_update.py
class TestTriangularMultiplicativeUpdate (line 29) | class TestTriangularMultiplicativeUpdate(unittest.TestCase):
method test_shape (line 30) | def test_shape(self):
method _tri_mul_compare (line 50) | def _tri_mul_compare(self, incoming=False):
method test_tri_mul_out_compare (line 101) | def test_tri_mul_out_compare(self):
method test_tri_mul_in_compare (line 105) | def test_tri_mul_in_compare(self):
method _tri_mul_inplace (line 108) | def _tri_mul_inplace(self, incoming=False):
method test_tri_mul_out_inference (line 137) | def test_tri_mul_out_inference(self):
method test_tri_mul_in_inference (line 140) | def test_tri_mul_in_inference(self):
FILE: vendor/openfold/tests/test_utils.py
class TestUtils (line 53) | class TestUtils(unittest.TestCase):
method test_rigid_from_3_points_shape (line 54) | def test_rigid_from_3_points_shape(self):
method test_rigid_from_4x4 (line 69) | def test_rigid_from_4x4(self):
method test_rigid_shape (line 91) | def test_rigid_shape(self):
method test_rigid_cat (line 101) | def test_rigid_cat(self):
method test_rigid_compose (line 126) | def test_rigid_compose(self):
method test_rigid_apply (line 151) | def test_rigid_apply(self):
method test_quat_to_rot (line 171) | def test_quat_to_rot(self):
method test_rot_to_quat (line 178) | def test_rot_to_quat(self):
method test_chunk_layer_tensor (line 184) | def test_chunk_layer_tensor(self):
method test_chunk_layer_dict (line 192) | def test_chunk_layer_dict(self):
method test_chunk_slice_dict (line 209) | def test_chunk_slice_dict(self):
method test_pre_compose_compare (line 225) | def test_pre_compose_compare(self):
FILE: vendor/openfold/thread_sequence.py
function main (line 36) | def main(args):
FILE: vendor/openfold/train_openfold.py
class OpenFoldWrapper (line 51) | class OpenFoldWrapper(pl.LightningModule):
method __init__ (line 52) | def __init__(self, config):
method forward (line 64) | def forward(self, batch):
method _log (line 67) | def _log(self, loss_breakdown, batch, outputs, train=True):
method training_step (line 97) | def training_step(self, batch, batch_idx):
method on_before_zero_grad (line 117) | def on_before_zero_grad(self, *args, **kwargs):
method validation_step (line 120) | def validation_step(self, batch, batch_idx):
method validation_epoch_end (line 142) | def validation_epoch_end(self, _):
method _compute_validation_metrics (line 147) | def _compute_validation_metrics(self,
method configure_optimizers (line 201) | def configure_optimizers(self,
method on_load_checkpoint (line 235) | def on_load_checkpoint(self, checkpoint):
method on_save_checkpoint (line 241) | def on_save_checkpoint(self, checkpoint):
method resume_last_lr_step (line 244) | def resume_last_lr_step(self, lr_step):
method load_from_jax (line 247) | def load_from_jax(self, jax_path):
function main (line 259) | def main(args):
function bool_type (line 385) | def bool_type(bool_str: str):
Copy disabled (too large)
Download .json
Condensed preview — 319 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (13,189K chars).
[
{
"path": ".gitignore",
"chars": 2228,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".pre-commit-config.yaml",
"chars": 3329,
"preview": "default_language_version:\n python: python3.9\n\nexclude: ^(docs/|build/|node_modules/|venv/|\\.venv/|vendor/)\nrepos:\n - r"
},
{
"path": "LICENSE",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 32475,
"preview": "<!-- <div align=\"center\"> -->\n<!-- omit in toc -->\n# The Family of Diffusion Protein Language Models (DPLM)\n<a href=\"htt"
},
{
"path": "analysis/TMalign.cpp",
"chars": 182097,
"preview": "/* TM-align: sequence-independent structure alignment of monomer proteins by\r\n * TM-score superposition. Please report i"
},
{
"path": "analysis/TMscore.cpp",
"chars": 297640,
"preview": "/* TM-score: superposition of two protein structures by assuming\r\n * correspondence between residues with the same resid"
},
{
"path": "analysis/cal_plddt_dir.py",
"chars": 9160,
"preview": "# Copyright (c) 2023 Meta Platforms, Inc. and affiliates\n# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPD"
},
{
"path": "analysis/cal_tmscore.py",
"chars": 4693,
"preview": "import argparse\nimport itertools\nimport multiprocessing as mp\nimport os\nimport re\nimport subprocess\nfrom glob import glo"
},
{
"path": "analysis/motif_analysis.ipynb",
"chars": 15900,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "analysis/plddt_calculate.sh",
"chars": 535,
"preview": "# Example:\n# bash analysis/plddt_calculate.sh generation-results/dplm_650m\n\noutput_dir=$1\noutput_filename_list=$(ls ${ou"
},
{
"path": "analysis/plot.ipynb",
"chars": 417961,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"markdown\",\n \"metadata\": {},\n \"source\": [\n \"# Unconditional\"\n ]\n },\n {\n \""
},
{
"path": "analysis/uncond_analysis.ipynb",
"chars": 96654,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": 1,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": [\n "
},
{
"path": "configs/callbacks/default.yaml",
"chars": 1338,
"preview": "model_summary:\n _target_: pytorch_lightning.callbacks.RichModelSummary\n max_depth: 2\n\n# rich_progress_bar:\n# _target"
},
{
"path": "configs/callbacks/fixedbb.yaml",
"chars": 1584,
"preview": "defaults:\n - default.yaml\n\nmodel_checkpoint:\n _target_: byprot.utils.callbacks.ModelCheckpoint\n monitor: \"val/acc_med"
},
{
"path": "configs/callbacks/lm.yaml",
"chars": 1029,
"preview": "defaults:\n - default.yaml\n\nmodel_checkpoint:\n _target_: byprot.utils.callbacks.ModelCheckpoint\n monitor: \"val/loss\" #"
},
{
"path": "configs/callbacks/structok.yaml",
"chars": 1258,
"preview": "model_summary:\n _target_: pytorch_lightning.callbacks.RichModelSummary\n max_depth: 2\n\n# rich_progress_bar:\n# _target"
},
{
"path": "configs/config.yaml",
"chars": 1815,
"preview": "# @package _global_\n\n# specify here default training configuration\ndefaults:\n - _self_\n - callbacks: # pytorch-lightni"
},
{
"path": "configs/datamodule/cath_4.3.yaml",
"chars": 521,
"preview": "_target_: cath\n\n# data_dir: ${data_dir} # data_dir is specified in config.yaml\n#data_dir: '/root/research/data/protein/c"
},
{
"path": "configs/datamodule/pdb.yaml",
"chars": 754,
"preview": "_target_: pdb\n\n# data_dir: ${data_dir} # data_dir is specified in config.yaml\n# CSV for path and metadata to training ex"
},
{
"path": "configs/datamodule/tokenized_protein.yaml",
"chars": 302,
"preview": "_target_: tokenized_protein\n\ndata_dir: ${paths.data_dir}\n# dataloader related\nmax_tokens: 6000\nmax_len: 1022\nnum_workers"
},
{
"path": "configs/datamodule/uniref50.yaml",
"chars": 187,
"preview": "_target_: uniref50\n\n# data_dir: ${data_dir} # data_dir is specified in config.yaml\ndata_dir: ${paths.data_dir}/uniref50\n"
},
{
"path": "configs/datamodule/uniref50_hf.yaml",
"chars": 193,
"preview": "_target_: uniref50_hf\n\n# data_dir: ${data_dir} # data_dir is specified in config.yaml\ndata_dir: ${paths.data_dir}/uniref"
},
{
"path": "configs/experiment/base.yaml",
"chars": 367,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/cond_dplm_150m.yaml",
"chars": 2072,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/cond_dplm_3b.yaml",
"chars": 2070,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/cond_dplm_650m.yaml",
"chars": 2072,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_150m.yaml",
"chars": 1972,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_150m_ds.yaml",
"chars": 1988,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_150m_stage2.yaml",
"chars": 2055,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_15b_ds.yaml",
"chars": 1976,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_30b_ds.yaml",
"chars": 1988,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_3b.yaml",
"chars": 1966,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_3b_ds.yaml",
"chars": 1982,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_3b_stage2.yaml",
"chars": 2047,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_650m.yaml",
"chars": 1972,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_650m_ds.yaml",
"chars": 1988,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/dplm_650m_stage2.yaml",
"chars": 2055,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm/mlm_150m.yaml",
"chars": 1597,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm2/dplm2_150m.yaml",
"chars": 2476,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm2/dplm2_3b.yaml",
"chars": 2468,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm2/dplm2_650m.yaml",
"chars": 2448,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm2/dplm2_650m_selfmixup.yaml",
"chars": 2512,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/dplm2/dplm2_bit_650m.yaml",
"chars": 2637,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/experiment/structok/inference/forward_folding.yaml",
"chars": 2395,
"preview": "# This config is highly inspired by the MultiFlow (https://github.com/jasonkyuyim/multiflow) repo.\n\nenv:\n PROJECT_ROOT:"
},
{
"path": "configs/experiment/structok/inference/inverse_folding.yaml",
"chars": 2666,
"preview": "# This config is highly inspired by the MultiFlow (https://github.com/jasonkyuyim/multiflow) repo.\n\nenv:\n PROJECT_ROOT:"
},
{
"path": "configs/experiment/structok/inference/reconstruction.yaml",
"chars": 2493,
"preview": "# This config is highly inspired by the MultiFlow (https://github.com/jasonkyuyim/multiflow) repo.\n\nenv:\n PROJECT_ROOT:"
},
{
"path": "configs/experiment/structok/inference/unconditional.yaml",
"chars": 1171,
"preview": "# This config is highly inspired by the MultiFlow (https://github.com/jasonkyuyim/multiflow) repo.\n\nenv:\n PROJECT_ROOT:"
},
{
"path": "configs/experiment/structok/inference/unconditional_codesign.yaml",
"chars": 1180,
"preview": "# This config is highly inspired by the MultiFlow (https://github.com/jasonkyuyim/multiflow) repo.\n\nenv:\n PROJECT_ROOT:"
},
{
"path": "configs/experiment/structok/structok_lfq_8k_pdb_swissprot_c512.yaml",
"chars": 2880,
"preview": "# @package _global_\n\n# to execute this experiment run:\n# python train.py experiment=example\n\ndefaults:\n - /datamodule: "
},
{
"path": "configs/hydra/default.yaml",
"chars": 311,
"preview": "# https://hydra.cc/docs/configure_hydra/intro/\n\n# enable color logging\ndefaults:\n - override hydra_logging: colorlog\n "
},
{
"path": "configs/logger/tensorboard.yaml",
"chars": 241,
"preview": "# https://www.tensorflow.org/tensorboard/\n\ntensorboard:\n _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLog"
},
{
"path": "configs/logger/wandb.yaml",
"chars": 424,
"preview": "# https://wandb.ai\n\nwandb:\n _target_: byprot.utils.logger.ByProtWandbLogger # lightning.pytorch.loggers.wandb.WandbLogg"
},
{
"path": "configs/paths/default.yaml",
"chars": 667,
"preview": "# path to root directory\n# this requires PROJECT_ROOT environment variable to exist\n# PROJECT_ROOT is inferred and set b"
},
{
"path": "configs/test.yaml",
"chars": 320,
"preview": "# @package _global_\n\n# specify here default evaluation configuration\ndefaults:\n - _self_\n - config\n\nexperiment_path: ?"
},
{
"path": "configs/trainer/ddp.yaml",
"chars": 157,
"preview": "defaults:\n - default.yaml\n\naccelerator: \"cuda\"\ndevices: \"auto\"\nstrategy: ddp_find_unused_parameters_true # ddp_sharded\n"
},
{
"path": "configs/trainer/ddp_bf16.yaml",
"chars": 42,
"preview": "defaults:\n - ddp.yaml\n\nprecision: \"bf16\"\n"
},
{
"path": "configs/trainer/ddp_fp16.yaml",
"chars": 38,
"preview": "defaults:\n - ddp.yaml\n\nprecision: 16\n"
},
{
"path": "configs/trainer/deepspeed_zero2.yaml",
"chars": 131,
"preview": "defaults:\n - default.yaml\n\naccelerator: \"cuda\"\ndevices: \"auto\"\nstrategy: \"deepspeed_stage_2\"\nprecision: 16\n# sync_batch"
},
{
"path": "configs/trainer/deepspeed_zero2_bf16.yaml",
"chars": 77,
"preview": "defaults:\n - deepspeed_zero2.yaml\n\nprecision: \"bf16\"\n# sync_batchnorm: True\n"
},
{
"path": "configs/trainer/deepspeed_zero2_fp16.yaml",
"chars": 73,
"preview": "defaults:\n - deepspeed_zero2.yaml\n\nprecision: 16\n# sync_batchnorm: True\n"
},
{
"path": "configs/trainer/deepspeed_zero2_offload.yaml",
"chars": 139,
"preview": "defaults:\n - default.yaml\n\naccelerator: \"cuda\"\ndevices: \"auto\"\nstrategy: \"deepspeed_stage_2_offload\"\nprecision: 16\n# sy"
},
{
"path": "configs/trainer/deepspeed_zero3.yaml",
"chars": 131,
"preview": "defaults:\n - default.yaml\n\naccelerator: \"cuda\"\ndevices: \"auto\"\nstrategy: \"deepspeed_stage_3\"\nprecision: 16\n# sync_batch"
},
{
"path": "configs/trainer/deepspeed_zero3_bf16.yaml",
"chars": 77,
"preview": "defaults:\n - deepspeed_zero3.yaml\n\nprecision: \"bf16\"\n# sync_batchnorm: True\n"
},
{
"path": "configs/trainer/default.yaml",
"chars": 379,
"preview": "_target_: pytorch_lightning.Trainer\n\naccelerator: \"gpu\"\ndevices: \"auto\"\n\nmin_epochs: 1\nmax_epochs: 10\nenable_progress_ba"
},
{
"path": "env.yml",
"chars": 6144,
"preview": "name: ByProt\nchannels:\n - defaults\ndependencies:\n - _libgcc_mutex=0.1=main\n - _openmp_mutex=5.1=1_gnu\n - ca-certific"
},
{
"path": "generate_dplm.py",
"chars": 4956,
"preview": "import argparse\nimport os\nfrom pprint import pprint\n\nimport torch\n\nfrom byprot import utils\nfrom byprot.models.dplm.dplm"
},
{
"path": "generate_dplm2.py",
"chars": 16859,
"preview": "import argparse\nimport os\n\nimport torch\nimport tree\nfrom Bio import SeqIO\nfrom peft.peft_model import PeftModel\nfrom tqd"
},
{
"path": "requirements.txt",
"chars": 1120,
"preview": "# --------- pytorch --------- #\npytorch_lightning==2.2.0\nlightning==2.2.0\ntorchmetrics\ntorch_geometric\n# torch_scatter\nt"
},
{
"path": "run/scaffold_generate_dplm.py",
"chars": 4633,
"preview": "# The get_motif function of this code is highly motivated by EvoDiff:\n# https://github.com/microsoft/evodiff\n\nimport arg"
},
{
"path": "run/scaffold_generate_dplm2.py",
"chars": 6816,
"preview": "import argparse\nimport os\nfrom pprint import pprint\n\nimport biotite.sequence.io.fasta as fasta\nimport numpy as np\nimport"
},
{
"path": "scripts/download_cath.sh",
"chars": 403,
"preview": "mkdir -p data-bin\nwget -r -nd -np http://people.csail.mit.edu/ingraham/graph-protein-design/data/cath/ -P data-bin/cath_"
},
{
"path": "scripts/download_metadata.sh",
"chars": 170,
"preview": "wget -O dplm2_metadata.tar.gz https://zenodo.org/records/15424801/files/dplm2_metadata.tar.gz?download=1\nmkdir -p data-b"
},
{
"path": "scripts/download_motif_scaffolds.sh",
"chars": 193,
"preview": "wget -O motif_scaffolding_pdbs.tar.gz https://zenodo.org/records/15424801/files/motif_scaffolding_pdbs.tar.gz?download=1"
},
{
"path": "scripts/download_pdb_swissprot_hf.sh",
"chars": 224,
"preview": "pip install huggingface_hub\nmkdir -p data-bin\n# download DPLM-2 training set (PDB and SwissProt) from huggingface hub\nhu"
},
{
"path": "scripts/download_uniref50_hf.sh",
"chars": 194,
"preview": "pip install huggingface_hub\nmkdir -p data-bin\n# download uniref50 dataset from huggingface hub\nhuggingface-cli download "
},
{
"path": "scripts/install.sh",
"chars": 164,
"preview": "pip install torch==2.2.0 torchvision==0.17.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu121\n\npip i"
},
{
"path": "setup.cfg",
"chars": 617,
"preview": "[isort]\nline_length = 99\nprofile = black\nfilter_files = True\n\n\n[flake8]\nmax_line_length = 99\nshow_source = True\nformat ="
},
{
"path": "setup.py",
"chars": 532,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom setuptools import"
},
{
"path": "src/byprot/__init__.py",
"chars": 185,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport byprot.datamodu"
},
{
"path": "src/byprot/datamodules/__init__.py",
"chars": 442,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport glob\nimport imp"
},
{
"path": "src/byprot/datamodules/cath_datamodule.py",
"chars": 5143,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom functools import "
},
{
"path": "src/byprot/datamodules/dataset/__init__.py",
"chars": 178,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# from .datapipe impor"
},
{
"path": "src/byprot/datamodules/dataset/cath.py",
"chars": 15635,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport json\nimport os\n"
},
{
"path": "src/byprot/datamodules/dataset/data_utils.py",
"chars": 15155,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport heapq\nfrom typi"
},
{
"path": "src/byprot/datamodules/dataset/tokenized_protein.py",
"chars": 20605,
"preview": "import imp\nimport math\nimport os\nfrom typing import Iterable, Sequence, TypeVar\n\nimport datasets\nimport numpy as np\nimpo"
},
{
"path": "src/byprot/datamodules/dataset/uniref.py",
"chars": 10030,
"preview": "# Copyright (c) 2023 Microsoft Corporation\n# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Iden"
},
{
"path": "src/byprot/datamodules/dataset/uniref_hf.py",
"chars": 9892,
"preview": "import json\nimport math\nimport os\nimport pickle as pkl\nfrom typing import Iterable, Sequence, TypeVar, Union\n\nimport num"
},
{
"path": "src/byprot/datamodules/pdb_dataset/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "src/byprot/datamodules/pdb_dataset/all_atom.py",
"chars": 13101,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "src/byprot/datamodules/pdb_dataset/pdb_datamodule.py",
"chars": 25877,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\"\"\"PDB data loader.\"\"\"\n"
},
{
"path": "src/byprot/datamodules/pdb_dataset/protein.py",
"chars": 11063,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "src/byprot/datamodules/pdb_dataset/residue_constants.py",
"chars": 38614,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "src/byprot/datamodules/pdb_dataset/utils.py",
"chars": 28372,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/datamodules/tokenized_protein_datamodule.py",
"chars": 5698,
"preview": "import os\nfrom functools import partial\nfrom typing import (\n Any,\n Callable,\n Dict,\n List,\n Optional,\n "
},
{
"path": "src/byprot/datamodules/uniref50.py",
"chars": 4319,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom functools import "
},
{
"path": "src/byprot/datamodules/uniref50_hf.py",
"chars": 3425,
"preview": "from functools import partial\nfrom typing import (\n Any,\n Callable,\n Dict,\n List,\n Optional,\n Sequence"
},
{
"path": "src/byprot/models/__init__.py",
"chars": 545,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport glob\nimport imp"
},
{
"path": "src/byprot/models/dplm/__init__.py",
"chars": 234,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom .dplm import Diff"
},
{
"path": "src/byprot/models/dplm/dplm.py",
"chars": 21979,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nimport os\n"
},
{
"path": "src/byprot/models/dplm/dplm_invfold.py",
"chars": 17327,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nfrom datac"
},
{
"path": "src/byprot/models/dplm/modules/dplm_adapter.py",
"chars": 10382,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom copy import deepc"
},
{
"path": "src/byprot/models/dplm/modules/dplm_modeling_esm.py",
"chars": 18145,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom typing import Lis"
},
{
"path": "src/byprot/models/dplm/modules/gvp_transformer_encoder.py",
"chars": 1614,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport esm\nimport torc"
},
{
"path": "src/byprot/models/dplm2/__init__.py",
"chars": 300,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom .dplm2 import Mul"
},
{
"path": "src/byprot/models/dplm2/dplm2.py",
"chars": 30686,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nimport os\n"
},
{
"path": "src/byprot/models/dplm2/dplm2_bit.py",
"chars": 17187,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nimport os\n"
},
{
"path": "src/byprot/models/dplm2/modules/dplm2_bit_modeling_esm.py",
"chars": 27584,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nimport os\n"
},
{
"path": "src/byprot/models/dplm2/modules/dplm2_modeling_esm.py",
"chars": 31477,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nimport os\n"
},
{
"path": "src/byprot/models/structok/modules/ema.py",
"chars": 3388,
"preview": "# Original file was released under CreativeML Open RAIL-M, with the full license text\n# available at https://github.com/"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "src/byprot/models/structok/modules/folding_utils/categorical_mixture.py",
"chars": 1467,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Meta P"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/decoder.py",
"chars": 16065,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n# This file has been mod"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/esmfold.py",
"chars": 13948,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Meta P"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/misc.py",
"chars": 11323,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n# This file has been mod"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/pretrained.py",
"chars": 6619,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Meta P"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/structure_module.py",
"chars": 25778,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n# This file has been mod"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/tri_self_attn_block.py",
"chars": 6529,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Meta P"
},
{
"path": "src/byprot/models/structok/modules/folding_utils/trunk.py",
"chars": 10393,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Meta P"
},
{
"path": "src/byprot/models/structok/modules/gvp_encoder.py",
"chars": 3489,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport esm\nimport torc"
},
{
"path": "src/byprot/models/structok/modules/lfq.py",
"chars": 12298,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# Original file was re"
},
{
"path": "src/byprot/models/structok/modules/loss.py",
"chars": 63622,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/models/structok/modules/nn.py",
"chars": 2910,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport torch\nfrom esm."
},
{
"path": "src/byprot/models/structok/modules/vqvae.py",
"chars": 20343,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/models/structok/structok_lfq.py",
"chars": 10243,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport os\n\nimport torc"
},
{
"path": "src/byprot/models/utils.py",
"chars": 14954,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport importlib\nimpor"
},
{
"path": "src/byprot/modules/__init__.py",
"chars": 1421,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport torch\nfrom torc"
},
{
"path": "src/byprot/modules/cross_entropy.py",
"chars": 13805,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport torch\nfrom torc"
},
{
"path": "src/byprot/modules/metrics.py",
"chars": 1883,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport math\nfrom funct"
},
{
"path": "src/byprot/modules/protein_metrics.py",
"chars": 6464,
"preview": "# Copyright (c) 2022 Jason Yim, Brian L Trippe, Valentin De Bortoli, Emile Mathieu\n# Copyright (c) 2024 Bytedance Ltd. a"
},
{
"path": "src/byprot/tasks/__init__.py",
"chars": 9148,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport copy\nimport glo"
},
{
"path": "src/byprot/tasks/lm/dplm.py",
"chars": 6682,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom typing import Any"
},
{
"path": "src/byprot/tasks/lm/dplm2.py",
"chars": 10489,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom typing import Any"
},
{
"path": "src/byprot/tasks/lm/dplm_invfold.py",
"chars": 21297,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport copy\nimport os\n"
},
{
"path": "src/byprot/tasks/lm/mlm.py",
"chars": 10302,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport copy\nimport os\n"
},
{
"path": "src/byprot/tasks/struct_tokenizer/structok.py",
"chars": 11947,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom contextlib import"
},
{
"path": "src/byprot/testing_pipeline.py",
"chars": 2376,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport os\nfrom typing "
},
{
"path": "src/byprot/training_pipeline.py",
"chars": 3710,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport os\nfrom typing "
},
{
"path": "src/byprot/utils/__init__.py",
"chars": 12687,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport glob\nimport imp"
},
{
"path": "src/byprot/utils/callbacks.py",
"chars": 11693,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport importlib\nimpor"
},
{
"path": "src/byprot/utils/config.py",
"chars": 4580,
"preview": "import importlib\nimport logging\nimport os\nfrom contextlib import contextmanager\nfrom copy import deepcopy\nfrom pathlib i"
},
{
"path": "src/byprot/utils/io.py",
"chars": 4379,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n# Copyright (c) Facebo"
},
{
"path": "src/byprot/utils/logger.py",
"chars": 2979,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom pathlib import Pa"
},
{
"path": "src/byprot/utils/lr_scheduler.py",
"chars": 5048,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport torch\nfrom torc"
},
{
"path": "src/byprot/utils/optim.py",
"chars": 4238,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport torch\nfrom torc"
},
{
"path": "src/byprot/utils/protein/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "src/byprot/utils/protein/all_atom.py",
"chars": 13101,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "src/byprot/utils/protein/evaluator_dplm2.py",
"chars": 44987,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/utils/protein/folding_model.py",
"chars": 8897,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/utils/protein/residue_constants.py",
"chars": 38614,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "src/byprot/utils/protein/tokenize_pdb.py",
"chars": 3130,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nimport argparse\nimport"
},
{
"path": "src/byprot/utils/protein/utils.py",
"chars": 30364,
"preview": "# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n#\n# This file has been m"
},
{
"path": "src/byprot/utils/registry.py",
"chars": 912,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\nfrom byprot.datamodule"
},
{
"path": "src/byprot/utils/scaffold_utils.py",
"chars": 19234,
"preview": "import os\nimport random\nfrom copy import deepcopy\nfrom pprint import pprint\n\nimport esm\nimport esm.inverse_folding\nimpor"
},
{
"path": "src/byprot/utils/strategies.py",
"chars": 2698,
"preview": "import logging\nfrom typing import Dict, List, Union\n\nimport torch\nfrom lightning_fabric.strategies.fsdp import (\n _ha"
},
{
"path": "test.py",
"chars": 1018,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n#!python\n\nimport pyroo"
},
{
"path": "train.py",
"chars": 1997,
"preview": "# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates\n# SPDX-License-Identifier: Apache-2.0\n\n\n#!python\n\nimport pyroo"
},
{
"path": "vendor/openfold/CITATION.cff",
"chars": 3257,
"preview": "cff-version: 1.2.0\npreferred-citation:\n authors:\n - family-names: \"Ahdritz\"\n given-names: \"Gustaf\"\n orci"
},
{
"path": "vendor/openfold/Dockerfile",
"chars": 1878,
"preview": "FROM nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04\n\n# metainformation\nLABEL org.opencontainers.image.version = \"1.0.0\"\nLAB"
},
{
"path": "vendor/openfold/LICENSE",
"chars": 11358,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "vendor/openfold/README.md",
"chars": 25424,
"preview": "\n_Figure: Comparison of OpenFold and AlphaFold2 predictions to the experimental structure "
},
{
"path": "vendor/openfold/deepspeed_config.json",
"chars": 424,
"preview": "{\n \"fp16\": {\n \"enabled\": false,\n \"min_loss_scale\": 1\n },\n \"amp\": {\n \"enabled\": false,\n \"opt_level\": \"O2\"\n"
},
{
"path": "vendor/openfold/environment.yml",
"chars": 729,
"preview": "name: openfold-venv\nchannels:\n - conda-forge\n - bioconda\n - pytorch\ndependencies:\n - python=3.9\n - libgcc=7.2\n - s"
},
{
"path": "vendor/openfold/notebooks/OpenFold.ipynb",
"chars": 39181,
"preview": "{\n \"nbformat\": 4,\n \"nbformat_minor\": 0,\n \"metadata\": {\n \"accelerator\": \"GPU\",\n \"colab\": {\n \"name\": \"OpenFo"
},
{
"path": "vendor/openfold/notebooks/environment.yml",
"chars": 382,
"preview": "name: openfold_venv\nchannels:\n - conda-forge\n - bioconda\ndependencies:\n - conda-forge::openmm=7.5.1\n - conda-forge::"
},
{
"path": "vendor/openfold/openfold/__init__.py",
"chars": 138,
"preview": "from . import model\nfrom . import utils\nfrom . import np\nfrom . import resources\n\n__all__ = [\"model\", \"utils\", \"np\", \"da"
},
{
"path": "vendor/openfold/openfold/config.py",
"chars": 25953,
"preview": "import copy\nimport importlib\nimport ml_collections as mlc\n\n\ndef set_inf(c, inf):\n for k, v in c.items():\n if i"
},
{
"path": "vendor/openfold/openfold/data/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "vendor/openfold/openfold/data/data_modules.py",
"chars": 27792,
"preview": "import copy\nfrom functools import partial\nimport json\nimport logging\nimport os\nimport pickle\nfrom typing import Optional"
},
{
"path": "vendor/openfold/openfold/data/data_pipeline.py",
"chars": 32364,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/data_transforms.py",
"chars": 38848,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/errors.py",
"chars": 872,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/feature_pipeline.py",
"chars": 4043,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/input_pipeline.py",
"chars": 6298,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/mmcif_parsing.py",
"chars": 17658,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/parsers.py",
"chars": 14254,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/templates.py",
"chars": 43966,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/tools/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "vendor/openfold/openfold/data/tools/hhblits.py",
"chars": 6255,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/tools/hhsearch.py",
"chars": 3673,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/tools/jackhmmer.py",
"chars": 8402,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/tools/kalign.py",
"chars": 3845,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/data/tools/utils.py",
"chars": 1430,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "vendor/openfold/openfold/model/dropout.py",
"chars": 2222,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not"
},
{
"path": "vendor/openfold/openfold/model/embedders.py",
"chars": 12547,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/evoformer.py",
"chars": 31113,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/heads.py",
"chars": 6875,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/model.py",
"chars": 19442,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/msa.py",
"chars": 13572,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/outer_product_mean.py",
"chars": 4669,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/pair_transition.py",
"chars": 2798,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/primitives.py",
"chars": 21314,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/structure_module.py",
"chars": 24587,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/template.py",
"chars": 18812,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/torchscript.py",
"chars": 6285,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not"
},
{
"path": "vendor/openfold/openfold/model/triangular_attention.py",
"chars": 4500,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/model/triangular_multiplicative_update.py",
"chars": 17341,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/np/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "vendor/openfold/openfold/np/protein.py",
"chars": 20286,
"preview": "# Copyright 2021 AlQuraishi Laboratory\n# Copyright 2021 DeepMind Technologies Limited\n#\n# Licensed under the Apache Lice"
},
{
"path": "vendor/openfold/openfold/np/relax/__init__.py",
"chars": 0,
"preview": ""
}
]
// ... and 119 more files (download for full content)
About this extraction
This page contains the full source code of the bytedance/dplm GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 319 files (12.3 MB), approximately 3.2M tokens, and a symbol index with 1958 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.