Showing preview only (288K chars total). Download the full file or copy to clipboard to get everything.
Repository: DevinKreuzer/SAN
Branch: main
Commit: 6e38329957af
Files: 153
Total size: 251.5 KB
Directory structure:
gitextract_q068fqi7/
├── .gitignore
├── LICENSE
├── README.md
├── configs/
│ ├── CLUSTER/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-1/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-2/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── MOLHIV/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── MOLPCBA/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── PATTERN/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-1/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-2/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ └── ZINC/
│ ├── ablation/
│ │ ├── full/
│ │ │ ├── 1e-2/
│ │ │ │ └── node
│ │ │ ├── 1e-3/
│ │ │ │ └── node
│ │ │ ├── 1e-4/
│ │ │ │ └── node
│ │ │ ├── 1e-5/
│ │ │ │ └── node
│ │ │ ├── 1e-6/
│ │ │ │ └── node
│ │ │ ├── 1e-7/
│ │ │ │ └── node
│ │ │ ├── 1e-8/
│ │ │ │ └── node
│ │ │ └── none
│ │ └── sparse/
│ │ ├── node
│ │ └── none
│ └── optimized
├── data/
│ ├── SBMs.py
│ ├── data.py
│ ├── molecules.py
│ ├── molhiv.py
│ ├── molpcba.py
│ ├── script_download_SBMs.sh
│ ├── script_download_all_datasets.sh
│ └── script_download_molecules.sh
├── layers/
│ ├── graph_transformer_layer.py
│ └── mlp_readout_layer.py
├── main_SBMs_node_classification.py
├── main_ZINC_graph_regression.py
├── main_molhiv.py
├── main_molpcba.py
├── misc/
│ ├── download_datasets.md
│ └── env_installation.md
├── nets/
│ ├── SBMs_node_classification/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ ├── ZINC_graph_regression/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ ├── molhiv_graph_regression/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ └── molpcba/
│ ├── SAN.py
│ ├── SAN_EdgeLPE.py
│ ├── SAN_NodeLPE.py
│ └── load_net.py
├── requirements.txt
├── scripts/
│ ├── CLUSTER/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-1
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── cluster_optimized
│ ├── MOLHIV/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── molhiv_optimized
│ ├── MOLPCBA/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── molpcba_optimized
│ ├── PATTERN/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-1
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── pattern_optimized
│ ├── ZINC/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── zinc_optimized
│ └── reproduce.md
└── train/
├── MetricWrapper.py
├── metrics.py
├── train_SBMs_node_classification.py
├── train_ZINC_graph_regression.py
├── train_molhiv.py
└── train_molpcba.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Custom GitIgnore
*.code-workspace
*.out
logs/
out/
dataset/
.vscode/
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2020 Vijay Prakash Dwivedi, Xavier Bresson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# SAN
Implementation of Spectral Attention Networks, a powerful GNN that leverages key principles from spectral graph theory to enable full graph attention.

# Overview
* ```nets``` contains the Node, Edge and no LPE architectures implemented with PyTorch.
* ```layers``` contains the multi-headed attention employed by the Main Graph Transformer implemented in DGL.
* ```train``` contains methods to train the models.
* ```data``` contains dataset classes and various methods used in precomputation.
* ```configs``` contains the various parameters used in the ablation and SOTA comparison studies.
* ```misc``` contains scripts from https://github.com/graphdeeplearning/graphtransformer to download datasets and setup environments.
* ```scripts``` contains scripts to reproduce ablation and SOTA comparison results. See ```scripts/reproduce.md``` for details.
================================================
FILE: configs/CLUSTER/ablation/full/1e-1/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-1/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-1,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-2/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-2/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-2,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-3/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-3/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-3,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-4/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-4/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-4,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-5/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-5/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-6/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-6/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-7/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-7/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-7,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/1e-8/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-8/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-8,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/full/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/none",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-1,
"m": 10,
"LPE": "none",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/sparse/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/sparse/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-1,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 56,
"GT_out_dim": 56,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/ablation/sparse/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/sparse/none",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-1,
"m": 10,
"LPE": "none",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 56,
"GT_out_dim": 56,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/CLUSTER/optimized
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_CLUSTER",
"out_dir": "out/SBM_CLUSTER/full/1e-1/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-5,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-1,
"m": 10,
"LPE": "node",
"LPE_layers": 1,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 16,
"GT_hidden_dim": 48,
"GT_out_dim": 48,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-3/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-3/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-3,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-4/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-4/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-4,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-5/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-5/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-6/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-7/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-7/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-7,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/1e-8/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-8/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-8,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/full/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "none",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/sparse/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/sparse/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/ablation/sparse/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/sparse/none",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"m": 10,
"LPE": "none",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 96,
"GT_out_dim": 96,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLHIV/optimized
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 10,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.01,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-3/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-3/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-3,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-4/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-4/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-4,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-5/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-5/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-6/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-7/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-7/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-7,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/1e-8/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/1e-8/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 128,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-8,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/full/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/full/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "none",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/sparse/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/sparse/node",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/ablation/sparse/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-HIV",
"out_dir": "out/MOLHIV/sparse/none",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0001,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"m": 10,
"LPE": "none",
"LPE_layers": 2,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 96,
"GT_out_dim": 96,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.03,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/MOLPCBA/optimized
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "MOL-PCBA",
"out_dir": "out/MOLPCBA/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 512,
"init_lr": 0.0003,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 20,
"min_lr": 1e-5,
"weight_decay": 0,
"print_epoch_interval": 5,
"max_time": 24,
"batch_accumulation": 2
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"extra_mlp": False,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 5,
"GT_hidden_dim": 304,
"GT_out_dim": 304,
"GT_n_heads": 4,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.2,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-1/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-1/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-1,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-2/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-2/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-2,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-3/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-3/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-3,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-4/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-4/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-4,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-5/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-5/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-6/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-7/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-7/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-7,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/1e-8/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-8/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-8,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/full/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-1,
"m": 10,
"LPE": "none",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/sparse/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/sparse/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-2,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/ablation/sparse/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/sparse/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-2,
"m": 10,
"LPE": "none",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 96,
"GT_out_dim": 96,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/PATTERN/optimized
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "SBM_PATTERN",
"out_dir": "out/SBM_PATTERN/full/1e-2/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 16,
"init_lr": 0.0005,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 10,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-2,
"m": 10,
"LPE": "node",
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 4,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 10,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-2/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-2/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-2,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-3/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-3/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-3,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-4/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-4/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-4,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-5/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-5/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-6/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-6/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-6,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-7/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-7/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-7,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/1e-8/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-8/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-8,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 64,
"GT_out_dim": 64,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/full/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"LPE": "none",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 80,
"GT_out_dim": 80,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/sparse/node
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/sparse/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"LPE": "node",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 72,
"GT_out_dim": 72,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/ablation/sparse/none
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/sparse/none/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 64,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": false,
"gamma": 1e-5,
"LPE": "none",
"m": 30,
"LPE_layers": 3,
"LPE_dim": 16,
"LPE_n_heads": 4,
"GT_layers": 6,
"GT_hidden_dim": 96,
"GT_out_dim": 96,
"GT_n_heads": 8,
"residual": true,
"readout": "mean",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: configs/ZINC/optimized
================================================
{
"gpu": {
"use": true,
"id": 0
},
"model": "GraphTransformer",
"dataset": "ZINC",
"out_dir": "out/ZINC/full/1e-5/node/",
"params": {
"seed": 41,
"epochs": 1000,
"batch_size": 32,
"init_lr": 0.0007,
"lr_reduce_factor": 0.5,
"lr_schedule_patience": 25,
"min_lr": 1e-6,
"weight_decay": 0.0,
"print_epoch_interval": 5,
"max_time": 24
},
"net_params": {
"full_graph": true,
"gamma": 1e-5,
"LPE": "node",
"m": 10,
"LPE_layers": 2,
"LPE_dim": 8,
"LPE_n_heads": 4,
"GT_layers": 10,
"GT_hidden_dim": 56,
"GT_out_dim": 56,
"GT_n_heads": 8,
"residual": true,
"readout": "sum",
"in_feat_dropout": 0.0,
"dropout": 0.0,
"layer_norm": false,
"batch_norm": true
}
}
================================================
FILE: data/SBMs.py
================================================
import time
import os
import pickle
import numpy as np
import dgl
import torch
import torch.nn.functional as F
from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib
class load_SBMsDataSetDGL(torch.utils.data.Dataset):
def __init__(self,
data_dir,
name,
split):
self.split = split
self.is_test = split.lower() in ['test', 'val']
with open(os.path.join(data_dir, name + '_%s.pkl' % self.split), 'rb') as f:
self.dataset = pickle.load(f)
self.node_labels = []
self.graph_lists = []
self.n_samples = len(self.dataset)
self._prepare()
def _prepare(self):
print("preparing %d graphs for the %s set..." % (self.n_samples, self.split.upper()))
for data in self.dataset:
node_features = data.node_feat
edge_list = (data.W != 0).nonzero() # converting adj matrix to edge_list
# Create the DGL Graph
g = dgl.DGLGraph()
g.add_nodes(node_features.size(0))
g.ndata['feat'] = node_features.long()
for src, dst in edge_list:
g.add_edges(src.item(), dst.item())
# adding edge features for Residual Gated ConvNet
#edge_feat_dim = g.ndata['feat'].size(1) # dim same as node feature dim
edge_feat_dim = 1 # dim same as node feature dim
g.edata['feat'] = torch.ones(g.number_of_edges(), edge_feat_dim)
self.graph_lists.append(g)
self.node_labels.append(data.node_label)
def __len__(self):
"""Return the number of graphs in the dataset."""
return self.n_samples
def __getitem__(self, idx):
"""
Get the idx^th sample.
Parameters
---------
idx : int
The sample index.
Returns
-------
(dgl.DGLGraph, int)
DGLGraph with node feature stored in `feat` field
And its label.
"""
return self.graph_lists[idx], self.node_labels[idx]
class SBMsDatasetDGL(torch.utils.data.Dataset):
def __init__(self, name):
"""
TODO
"""
start = time.time()
print("[I] Loading data ...")
self.name = name
data_dir = 'data/SBMs'
self.train = load_SBMsDataSetDGL(data_dir, name, split='train')
self.test = load_SBMsDataSetDGL(data_dir, name, split='test')
self.val = load_SBMsDataSetDGL(data_dir, name, split='val')
print("[I] Finished loading.")
print("[I] Data load time: {:.4f}s".format(time.time()-start))
def laplace_decomp(g, max_freqs):
# Laplacian
n = g.number_of_nodes()
A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
L = sp.eye(g.number_of_nodes()) - N * A * N
# Eigenvectors with numpy
EigVals, EigVecs = np.linalg.eigh(L.toarray())
EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs] # Keep up to the maximum desired number of frequencies
# Normalize and pad EigenVectors
EigVecs = torch.from_numpy(EigVecs).float()
EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
if n<max_freqs:
g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs-n), value=float('nan'))
else:
g.ndata['EigVecs']= EigVecs
#Save eigenvales and pad
EigVals = torch.from_numpy(np.sort(np.abs(np.real(EigVals)))) #Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
if n<max_freqs:
EigVals = F.pad(EigVals, (0, max_freqs-n), value=float('nan')).unsqueeze(0)
else:
EigVals=EigVals.unsqueeze(0)
#Save EigVals node features
g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(),1).unsqueeze(2)
return g
def make_full_graph(g):
full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
#Here we copy over the node feature data and laplace encodings
full_g.ndata['feat'] = g.ndata['feat']
try:
full_g.ndata['EigVecs'] = g.ndata['EigVecs']
full_g.ndata['EigVals'] = g.ndata['EigVals']
except:
pass
#Populate edge features w/ 0s
full_g.edata['feat']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
full_g.edata['real']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
#Copy real edge data over
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long)
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long)
return full_g
def add_edge_laplace_feats(g):
EigVals = g.ndata['EigVals'][0].flatten()
source, dest = g.find_edges(g.edges(form='eid'))
#Compute diffusion distances and Green function
g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs']-g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(),1).unsqueeze(2)
#No longer need EigVecs and EigVals stored as node features
del g.ndata['EigVecs']
del g.ndata['EigVals']
return g
class SBMsDataset(torch.utils.data.Dataset):
def __init__(self, name):
"""
Loading SBM datasets
"""
start = time.time()
print("[I] Loading dataset %s..." % (name))
self.name = name
data_dir = 'data/SBMs/'
with open(data_dir+name+'.pkl',"rb") as f:
f = pickle.load(f)
self.train = f[0]
self.val = f[1]
self.test = f[2]
print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
print("[I] Finished loading.")
print("[I] Data load time: {:.4f}s".format(time.time()-start))
def collate(self, samples):
graphs, labels = map(list, zip(*samples))
labels = torch.cat(labels).long()
batched_graph = dgl.batch(graphs)
return batched_graph, labels
def _laplace_decomp(self, max_freqs):
self.train.graph_lists = [laplace_decomp(g, max_freqs) for g in self.train.graph_lists]
self.val.graph_lists = [laplace_decomp(g, max_freqs) for g in self.val.graph_lists]
self.test.graph_lists = [laplace_decomp(g, max_freqs) for g in self.test.graph_lists]
def _make_full_graph(self):
self.train.graph_lists = [make_full_graph(g) for g in self.train.graph_lists]
self.val.graph_lists = [make_full_graph(g) for g in self.val.graph_lists]
self.test.graph_lists = [make_full_graph(g) for g in self.test.graph_lists]
def _add_edge_laplace_feats(self):
self.train.graph_lists = [add_edge_laplace_feats(g) for g in self.train.graph_lists]
self.val.graph_lists = [add_edge_laplace_feats(g) for g in self.val.graph_lists]
self.test.graph_lists = [add_edge_laplace_feats(g) for g in self.test.graph_lists]
================================================
FILE: data/data.py
================================================
"""
File to load dataset based on user control from main file
"""
from data.molecules import MoleculeDataset
from data.SBMs import SBMsDataset
from data.molhiv import MolHIVDataset
from data.molpcba import MolPCBADataset
def LoadData(DATASET_NAME):
"""
This function is called in the main_xx.py file
returns:
; dataset object
"""
# handling for (ZINC) molecule dataset
if DATASET_NAME == 'ZINC':
return MoleculeDataset(DATASET_NAME)
# handling for SBM datasets
SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN']
if DATASET_NAME in SBM_DATASETS:
return SBMsDataset(DATASET_NAME)
if DATASET_NAME == 'MOL-HIV':
return MolHIVDataset(DATASET_NAME)
if DATASET_NAME == 'MOL-PCBA':
return MolPCBADataset(DATASET_NAME)
================================================
FILE: data/molecules.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np
import csv
import dgl
import torch.nn.functional as F
from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib
class MoleculeDGL(torch.utils.data.Dataset):
def __init__(self, data_dir, split, num_graphs=None):
self.data_dir = data_dir
self.split = split
self.num_graphs = num_graphs
with open(data_dir + "/%s.pickle" % self.split,"rb") as f:
self.data = pickle.load(f)
if self.num_graphs in [10000, 1000]:
# loading the sampled indices from file ./zinc_molecules/<split>.index
with open(data_dir + "/%s.index" % self.split,"r") as f:
data_idx = [list(map(int, idx)) for idx in csv.reader(f)]
self.data = [ self.data[i] for i in data_idx[0] ]
assert len(self.data)==num_graphs, "Sample num_graphs again; available idx: train/val/test => 10k/1k/1k"
"""
data is a list of Molecule dict objects with following attributes
molecule = data[idx]
; molecule['num_atom'] : nb of atoms, an integer (N)
; molecule['atom_type'] : tensor of size N, each element is an atom type, an integer between 0 and num_atom_type
; molecule['bond_type'] : tensor of size N x N, each element is a bond type, an integer between 0 and num_bond_type
; molecule['logP_SA_cycle_normalized'] : the chemical property to regress, a float variable
"""
self.graph_lists = []
self.graph_labels = []
self.n_samples = len(self.data)
self._prepare()
def _prepare(self):
print("preparing %d graphs for the %s set..." % (self.num_graphs, self.split.upper()))
for molecule in self.data:
node_features = molecule['atom_type'].long()
adj = molecule['bond_type']
edge_list = (adj != 0).nonzero() # converting adj matrix to edge_list
edge_idxs_in_adj = edge_list.split(1, dim=1)
edge_features = adj[edge_idxs_in_adj].reshape(-1).long()
# Create the DGL Graph
g = dgl.DGLGraph()
g.add_nodes(molecule['num_atom'])
g.ndata['feat'] = node_features
for src, dst in edge_list:
g.add_edges(src.item(), dst.item())
g.edata['feat'] = edge_features
self.graph_lists.append(g)
self.graph_labels.append(molecule['logP_SA_cycle_normalized'])
def __len__(self):
"""Return the number of graphs in the dataset."""
return self.n_samples
def __getitem__(self, idx):
"""
Get the idx^th sample.
Parameters
---------
idx : int
The sample index.
Returns
-------
(dgl.DGLGraph, int)
DGLGraph with node feature stored in `feat` field
And its label.
"""
return self.graph_lists[idx], self.graph_labels[idx]
class MoleculeDatasetDGL(torch.utils.data.Dataset):
def __init__(self, name='Zinc'):
t0 = time.time()
self.name = name
self.num_atom_type = 28 # known meta-info about the zinc dataset; can be calculated as well
self.num_bond_type = 4 # known meta-info about the zinc dataset; can be calculated as well
data_dir='./data/molecules'
if self.name == 'ZINC-full':
data_dir='./data/molecules/zinc_full'
self.train = MoleculeDGL(data_dir, 'train', num_graphs=220011)
self.val = MoleculeDGL(data_dir, 'val', num_graphs=24445)
self.test = MoleculeDGL(data_dir, 'test', num_graphs=5000)
else:
self.train = MoleculeDGL(data_dir, 'train', num_graphs=10000)
self.val = MoleculeDGL(data_dir, 'val', num_graphs=1000)
self.test = MoleculeDGL(data_dir, 'test', num_graphs=1000)
print("Time taken: {:.4f}s".format(time.time()-t0))
def laplace_decomp(g, max_freqs):
# Laplacian
n = g.number_of_nodes()
A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
L = sp.eye(g.number_of_nodes()) - N * A * N
# Eigenvectors with numpy
EigVals, EigVecs = np.linalg.eigh(L.toarray())
EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs] # Keep up to the maximum desired number of frequencies
# Normalize and pad EigenVectors
EigVecs = torch.from_numpy(EigVecs).float()
EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
if n<max_freqs:
g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs-n), value=float('nan'))
else:
g.ndata['EigVecs']= EigVecs
#Save eigenvales and pad
EigVals = torch.from_numpy(np.sort(np.abs(np.real(EigVals)))) #Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
if n<max_freqs:
EigVals = F.pad(EigVals, (0, max_freqs-n), value=float('nan')).unsqueeze(0)
else:
EigVals=EigVals.unsqueeze(0)
#Save EigVals node features
g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(),1).unsqueeze(2)
return g
def make_full_graph(g):
full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
#Here we copy over the node feature data and laplace encodings
full_g.ndata['feat'] = g.ndata['feat']
try:
full_g.ndata['EigVecs'] = g.ndata['EigVecs']
full_g.ndata['EigVals'] = g.ndata['EigVals']
except:
pass
#Populate edge features w/ 0s
full_g.edata['feat']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
full_g.edata['real']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
#Copy real edge data over
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long)
return full_g
def add_edge_laplace_feats(g):
EigVals = g.ndata['EigVals'][0].flatten()
source, dest = g.find_edges(g.edges(form='eid'))
#Compute diffusion distances and Green function
g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs']-g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(),1).unsqueeze(2)
#No longer need EigVecs and EigVals stored as node features
del g.ndata['EigVecs']
del g.ndata['EigVals']
return g
class MoleculeDataset(torch.utils.data.Dataset):
def __init__(self, name):
"""
Loading ZINC dataset
"""
start = time.time()
print("[I] Loading dataset %s..." % (name))
self.name = name
data_dir = 'data/molecules/'
with open(data_dir+name+'.pkl',"rb") as f:
f = pickle.load(f)
self.train = f[0]
self.val = f[1]
self.test = f[2]
self.num_atom_type = f[3]
self.num_bond_type = f[4]
print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
print("[I] Finished loading.")
print("[I] Data load time: {:.4f}s".format(time.time()-start))
def collate(self, samples):
graphs, labels = map(list, zip(*samples))
labels = torch.tensor(np.array(labels)).unsqueeze(1)
batched_graph = dgl.batch(graphs)
return batched_graph, labels
def _laplace_decomp(self, max_freqs):
self.train.graph_lists = [laplace_decomp(g, max_freqs) for g in self.train.graph_lists]
self.val.graph_lists = [laplace_decomp(g, max_freqs) for g in self.val.graph_lists]
self.test.graph_lists = [laplace_decomp(g, max_freqs) for g in self.test.graph_lists]
def _make_full_graph(self):
self.train.graph_lists = [make_full_graph(g) for g in self.train.graph_lists]
self.val.graph_lists = [make_full_graph(g) for g in self.val.graph_lists]
self.test.graph_lists = [make_full_graph(g) for g in self.test.graph_lists]
def _add_edge_laplace_feats(self):
self.train.graph_lists = [add_edge_laplace_feats(g) for g in self.train.graph_lists]
self.val.graph_lists = [add_edge_laplace_feats(g) for g in self.val.graph_lists]
self.test.graph_lists = [add_edge_laplace_feats(g) for g in self.test.graph_lists]
================================================
FILE: data/molhiv.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np
import csv
import dgl
import torch.nn.functional as F
from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib
def laplace_decomp(graph, max_freqs):
g, label = graph
# Laplacian
n = g.number_of_nodes()
A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
L = sp.eye(g.number_of_nodes()) - N * A * N
# Eigenvectors with numpy
EigVals, EigVecs = np.linalg.eigh(L.toarray())
EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs] # Keep up to the maximum desired number of frequencies
# Normalize and pad EigenVectors
EigVecs = torch.from_numpy(EigVecs).float()
EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
if n < max_freqs:
g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs - n), value=float('nan'))
else:
g.ndata['EigVecs'] = EigVecs
# Save eigenvalues and pad
EigVals = torch.from_numpy(np.sort(np.abs(np.real(
EigVals)))) # Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
if n < max_freqs:
EigVals = F.pad(EigVals, (0, max_freqs - n), value=float('nan')).unsqueeze(0)
else:
EigVals = EigVals.unsqueeze(0)
# Save EigVals node features
g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(), 1).unsqueeze(2)
return g, label
def make_full_graph(graph):
g, label = graph
full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
# Copy over the node feature data and laplace eigvals/eigvecs
full_g.ndata['feat'] = g.ndata['feat']
try:
full_g.ndata['EigVecs'] = g.ndata['EigVecs']
full_g.ndata['EigVals'] = g.ndata['EigVals']
except:
pass
# Initalize fake edge features w/ 0s
full_g.edata['feat'] = torch.zeros(full_g.number_of_edges(), 3, dtype=torch.long)
full_g.edata['real'] = torch.zeros(full_g.number_of_edges(), dtype=torch.long)
# Copy real edge data over, and identify real edges!
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(
g.edata['feat'].shape[0], dtype=torch.long) # This indicates real edges
return full_g, label
def add_edge_laplace_feats(graph):
g, label = graph
EigVals = g.ndata['EigVals'][0].flatten()
source, dest = g.find_edges(g.edges(form='eid'))
# Compute diffusion differences and Green function
g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs'] - g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(), 1).unsqueeze(2)
# No longer need EigVecs and EigVals stored as node features
del g.ndata['EigVecs']
del g.ndata['EigVals']
return g, label
from ogb.graphproppred import DglGraphPropPredDataset, collate_dgl
class MolHIVDataset(torch.utils.data.Dataset):
def __init__(self, name):
"""
Loading ZINC dataset
"""
start = time.time()
print("[I] Loading dataset %s..." % (name))
self.name = name
dataset = DglGraphPropPredDataset(name='ogbg-molhiv')
split_idx = dataset.get_idx_split()
split_idx["train"] = split_idx["train"]
split_idx["valid"] = split_idx["valid"]
split_idx["test"] = split_idx["test"]
self.train = dataset[split_idx["train"]]
self.val = dataset[split_idx["valid"]]
self.test = dataset[split_idx["test"]]
print('train, test, val sizes :', len(self.train), len(self.test), len(self.val))
print("[I] Finished loading.")
print("[I] Data load time: {:.4f}s".format(time.time() - start))
def collate(self, samples):
graphs, labels = map(list, zip(*samples))
batched_graph = dgl.batch(graphs)
labels = torch.stack(labels)
return batched_graph, labels
def _laplace_decomp(self, max_freqs):
self.train = [laplace_decomp(graph, max_freqs) for graph in self.train]
self.val = [laplace_decomp(graph, max_freqs) for graph in self.val]
self.test = [laplace_decomp(graph, max_freqs) for graph in self.test]
def _make_full_graph(self):
self.train = [make_full_graph(graph) for graph in self.train]
self.val = [make_full_graph(graph) for graph in self.val]
self.test = [make_full_graph(graph) for graph in self.test]
def _add_edge_laplace_feats(self):
self.train = [add_edge_laplace_feats(graph) for graph in self.train]
self.val = [add_edge_laplace_feats(graph) for graph in self.val]
self.test = [add_edge_laplace_feats(graph) for graph in self.test]
================================================
FILE: data/molpcba.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np
import csv
import dgl
import torch.nn.functional as F
from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib
from tqdm.std import tqdm
def laplace_decomp(graph, max_freqs):
g, label = graph
# Laplacian
n = g.number_of_nodes()
A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
L = sp.eye(g.number_of_nodes()) - N * A * N
EigVals, EigVecs = np.linalg.eig(L.toarray())
idx = EigVals.argsort()[0 : max_freqs] # Keep up to the maximum desired number of frequencies
EigVals, EigVecs = EigVals[idx], np.real(EigVecs[:,idx])
#Sort, normalize and pad EigenVectors
EigVecs = EigVecs[:, EigVals.argsort()]# increasing order
# Normalize and pad EigenVectors
EigVecs = torch.from_numpy(EigVecs).float()
EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
if n < max_freqs:
g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs - n), value=float('nan'))
else:
g.ndata['EigVecs'] = EigVecs
# Save eigenvalues and pad
EigVals = torch.from_numpy(np.sort(np.abs(np.real(
EigVals)))) # Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
if n < max_freqs:
EigVals = F.pad(EigVals, (0, max_freqs - n), value=float('nan')).unsqueeze(0)
else:
EigVals = EigVals.unsqueeze(0)
# Save EigVals node features
g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(), 1).unsqueeze(2)
return g, label
def make_full_graph(graph):
g, label = graph
full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
# Copy over the node feature data and laplace eigvals/eigvecs
full_g.ndata['feat'] = g.ndata['feat']
try:
full_g.ndata['EigVecs'] = g.ndata['EigVecs']
full_g.ndata['EigVals'] = g.ndata['EigVals']
except:
pass
# Initalize fake edge features w/ 0s
full_g.edata['feat'] = torch.zeros(full_g.number_of_edges(), 3, dtype=torch.long)
full_g.edata['real'] = torch.zeros(full_g.number_of_edges(), dtype=torch.long)
# Copy real edge data over, and identify real edges!
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(
g.edata['feat'].shape[0], dtype=torch.long) # This indicates real edges
return full_g, label
def add_edge_laplace_feats(graph):
g, label = graph
EigVals = g.ndata['EigVals'][0].flatten()
source, dest = g.find_edges(g.edges(form='eid'))
# Compute diffusion differences and Green function
g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs'] - g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(), 1).unsqueeze(2)
# No longer need EigVecs and EigVals stored as node features
del g.ndata['EigVecs']
del g.ndata['EigVals']
return g, label
from ogb.graphproppred import DglGraphPropPredDataset, collate_dgl
class MolPCBADataset(torch.utils.data.Dataset):
def __init__(self, name):
"""
Loading PCBA dataset
"""
start = time.time()
print("[I] Loading dataset %s..." % (name))
self.name = name
dataset = DglGraphPropPredDataset(name='ogbg-molpcba')
split_idx = dataset.get_idx_split()
split_idx["train"] = split_idx["train"]
split_idx["valid"] = split_idx["valid"]
split_idx["test"] = split_idx["test"]
self.train = dataset[split_idx["train"]]
self.val = dataset[split_idx["valid"]]
self.test = dataset[split_idx["test"]]
print('train, test, val sizes :', len(self.train), len(self.test), len(self.val))
print("[I] Finished loading.")
print("[I] Data load time: {:.4f}s".format(time.time() - start))
def collate(self, samples):
graphs, labels = map(list, zip(*samples))
batched_graph = dgl.batch(graphs)
labels = torch.stack(labels)
return batched_graph, labels
def _laplace_decomp(self, max_freqs):
self.train = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.train)]
self.val = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.val)]
self.test = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.test)]
def _make_full_graph(self):
self.train = [make_full_graph(graph) for graph in tqdm(self.train)]
self.val = [make_full_graph(graph) for graph in tqdm(self.val)]
self.test = [make_full_graph(graph) for graph in tqdm(self.test)]
def _add_edge_laplace_feats(self):
self.train = [add_edge_laplace_feats(graph) for graph in tqdm(self.train)]
self.val = [add_edge_laplace_feats(graph) for graph in tqdm(self.val)]
self.test = [add_edge_laplace_feats(graph) for graph in tqdm(self.test)]
================================================
FILE: data/script_download_SBMs.sh
================================================
# Command to download dataset:
# bash script_download_SBMs.sh
mkdir SBMs
cd SBMs
FILE=SBM_CLUSTER.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_CLUSTER.pkl -o SBM_CLUSTER.pkl -J -L -k
fi
FILE=SBM_PATTERN.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_PATTERN.pkl -o SBM_PATTERN.pkl -J -L -k
fi
================================================
FILE: data/script_download_all_datasets.sh
================================================
# Command to download dataset:
# bash script_download_all_datasets.sh
############
# ZINC
############
mkdir molecules
cd molecules
FILE=ZINC.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/ZINC.pkl -o ZINC.pkl -J -L -k
fi
cd ..
############
# PATTERN and CLUSTER
############
mkdir SBMs
cd SBMs
FILE=SBM_CLUSTER.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_CLUSTER.pkl -o SBM_CLUSTER.pkl -J -L -k
fi
FILE=SBM_PATTERN.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_PATTERN.pkl -o SBM_PATTERN.pkl -J -L -k
fi
cd ..
================================================
FILE: data/script_download_molecules.sh
================================================
# Command to download dataset:
# bash script_download_molecules.sh
mkdir molecules/
cd molecules
FILE=ZINC.pkl
if test -f "$FILE"; then
echo -e "$FILE already downloaded."
else
echo -e "\ndownloading $FILE..."
curl https://data.dgl.ai/dataset/benchmarking-gnns/ZINC.pkl -o ZINC.pkl -J -L -k
fi
================================================
FILE: layers/graph_transformer_layer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import dgl.function as fn
import numpy as np
"""
Graph Transformer Layer
"""
"""
Util functions
"""
def src_dot_dst(src_field, dst_field, out_field):
def func(edges):
return {out_field: (edges.src[src_field] * edges.dst[dst_field])}
return func
def scaling(field, scale_constant):
def func(edges):
return {field: ((edges.data[field]) / scale_constant)}
return func
# Improving implicit attention scores with explicit edge features, if available
def imp_exp_attn(implicit_attn, explicit_edge):
"""
implicit_attn: the output of K Q
explicit_edge: the explicit edge features
"""
def func(edges):
return {implicit_attn: (edges.data[implicit_attn] * edges.data[explicit_edge])}
return func
def exp_real(field, L):
def func(edges):
# clamp for softmax numerical stability
return {'score_soft': torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))/(L+1)}
return func
def exp_fake(field, L):
def func(edges):
# clamp for softmax numerical stability
return {'score_soft': L*torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))/(L+1)}
return func
def exp(field):
def func(edges):
# clamp for softmax numerical stability
return {'score_soft': torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))}
return func
"""
Single Attention Head
"""
class MultiHeadAttentionLayer(nn.Module):
def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, use_bias):
super().__init__()
self.out_dim = out_dim
self.num_heads = num_heads
self.gamma = gamma
self.full_graph=full_graph
if use_bias:
self.Q = nn.Linear(in_dim, out_dim * num_heads, bias=True)
self.K = nn.Linear(in_dim, out_dim * num_heads, bias=True)
self.E = nn.Linear(in_dim, out_dim * num_heads, bias=True)
if self.full_graph:
self.Q_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
self.K_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
self.E_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
self.V = nn.Linear(in_dim, out_dim * num_heads, bias=True)
else:
self.Q = nn.Linear(in_dim, out_dim * num_heads, bias=False)
self.K = nn.Linear(in_dim, out_dim * num_heads, bias=False)
self.E = nn.Linear(in_dim, out_dim * num_heads, bias=False)
if self.full_graph:
self.Q_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
self.K_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
self.E_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
self.V = nn.Linear(in_dim, out_dim * num_heads, bias=False)
def propagate_attention(self, g):
if self.full_graph:
real_ids = torch.nonzero(g.edata['real']).squeeze()
fake_ids = torch.nonzero(g.edata['real']==0).squeeze()
else:
real_ids = g.edges(form='eid')
g.apply_edges(src_dot_dst('K_h', 'Q_h', 'score'), edges=real_ids)
if self.full_graph:
g.apply_edges(src_dot_dst('K_2h', 'Q_2h', 'score'), edges=fake_ids)
# scale scores by sqrt(d)
g.apply_edges(scaling('score', np.sqrt(self.out_dim)))
# Use available edge features to modify the scores for edges
g.apply_edges(imp_exp_attn('score', 'E'), edges=real_ids)
if self.full_graph:
g.apply_edges(imp_exp_attn('score', 'E_2'), edges=fake_ids)
if self.full_graph:
# softmax and scaling by gamma
L=self.gamma
g.apply_edges(exp_real('score', L), edges=real_ids)
g.apply_edges(exp_fake('score', L), edges=fake_ids)
else:
g.apply_edges(exp('score'), edges=real_ids)
# Send weighted values to target nodes
eids = g.edges()
g.send_and_recv(eids, fn.src_mul_edge('V_h', 'score_soft', 'V_h'), fn.sum('V_h', 'wV'))
g.send_and_recv(eids, fn.copy_edge('score_soft', 'score_soft'), fn.sum('score_soft', 'z'))
def forward(self, g, h, e):
Q_h = self.Q(h)
K_h = self.K(h)
E = self.E(e)
if self.full_graph:
Q_2h = self.Q_2(h)
K_2h = self.K_2(h)
E_2 = self.E_2(e)
V_h = self.V(h)
# Reshaping into [num_nodes, num_heads, feat_dim] to
# get projections for multi-head attention
g.ndata['Q_h'] = Q_h.view(-1, self.num_heads, self.out_dim)
g.ndata['K_h'] = K_h.view(-1, self.num_heads, self.out_dim)
g.edata['E'] = E.view(-1, self.num_heads, self.out_dim)
if self.full_graph:
g.ndata['Q_2h'] = Q_2h.view(-1, self.num_heads, self.out_dim)
g.ndata['K_2h'] = K_2h.view(-1, self.num_heads, self.out_dim)
g.edata['E_2'] = E_2.view(-1, self.num_heads, self.out_dim)
g.ndata['V_h'] = V_h.view(-1, self.num_heads, self.out_dim)
self.propagate_attention(g)
h_out = g.ndata['wV'] / (g.ndata['z'] + torch.full_like(g.ndata['z'], 1e-6))
return h_out
class GraphTransformerLayer(nn.Module):
"""
Param:
"""
def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, dropout=0.0, layer_norm=False, batch_norm=True, residual=True, use_bias=False):
super().__init__()
self.in_channels = in_dim
self.out_channels = out_dim
self.num_heads = num_heads
self.dropout = dropout
self.residual = residual
self.layer_norm = layer_norm
self.batch_norm = batch_norm
self.attention = MultiHeadAttentionLayer(gamma, in_dim, out_dim//num_heads, num_heads, full_graph, use_bias)
self.O_h = nn.Linear(out_dim, out_dim)
if self.layer_norm:
self.layer_norm1_h = nn.LayerNorm(out_dim)
if self.batch_norm:
self.batch_norm1_h = nn.BatchNorm1d(out_dim)
# FFN for h
self.FFN_h_layer1 = nn.Linear(out_dim, out_dim*2)
self.FFN_h_layer2 = nn.Linear(out_dim*2, out_dim)
if self.layer_norm:
self.layer_norm2_h = nn.LayerNorm(out_dim)
if self.batch_norm:
self.batch_norm2_h = nn.BatchNorm1d(out_dim)
def forward(self, g, h, e):
h_in1 = h # for first residual connection
# multi-head attention out
h_attn_out = self.attention(g, h, e)
#Concat multi-head outputs
h = h_attn_out.view(-1, self.out_channels)
h = F.dropout(h, self.dropout, training=self.training)
h = self.O_h(h)
if self.residual:
h = h_in1 + h # residual connection
if self.layer_norm:
h = self.layer_norm1_h(h)
if self.batch_norm:
h = self.batch_norm1_h(h)
h_in2 = h # for second residual connection
# FFN for h
h = self.FFN_h_layer1(h)
h = F.relu(h)
h = F.dropout(h, self.dropout, training=self.training)
h = self.FFN_h_layer2(h)
if self.residual:
h = h_in2 + h # residual connection
if self.layer_norm:
h = self.layer_norm2_h(h)
if self.batch_norm:
h = self.batch_norm2_h(h)
return h, e
def __repr__(self):
return '{}(in_channels={}, out_channels={}, heads={}, residual={})'.format(self.__class__.__name__,
self.in_channels,
self.out_channels, self.num_heads, self.residual)
================================================
FILE: layers/mlp_readout_layer.py
================================================
import torch.nn as nn
import torch.nn.functional as F
"""
MLP Layer used after graph vector representation
"""
class MLPReadout(nn.Module):
def __init__(self, input_dim, output_dim, L=2): # L=nb_hidden_layers
super().__init__()
list_FC_layers = [nn.Linear(input_dim // 2 ** l, input_dim // 2 ** (l + 1), bias=True) for l in range(L)]
list_FC_layers.append(nn.Linear(input_dim // 2 ** L, output_dim, bias=True))
self.FC_layers = nn.ModuleList(list_FC_layers)
self.L = L
def forward(self, x):
y = x
for l in range(self.L):
y = self.FC_layers[l](y)
y = F.relu(y)
y = self.FC_layers[self.L](y)
return y
================================================
FILE: main_SBMs_node_classification.py
================================================
"""
IMPORTING LIBS
"""
import dgl
import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from tqdm import tqdm
class DotDict(dict):
def __init__(self, **kwds):
self.update(kwds)
self.__dict__ = self
"""
IMPORTING CUSTOM MODULES/METHODS
"""
from nets.SBMs_node_classification.load_net import gnn_model
from data.data import LoadData
"""
GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
if torch.cuda.is_available() and use_gpu:
print('cuda available with GPU:',torch.cuda.get_device_name(0))
device = torch.device("cuda")
else:
print('cuda not available')
device = torch.device("cpu")
return device
"""
VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
model = gnn_model(LPE, net_params)
total_param = 0
print("MODEL DETAILS:\n")
for param in model.parameters():
total_param += np.prod(list(param.data.size()))
if LPE == 'edge':
print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
elif LPE == 'node':
print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
else:
print('Encoding Type/Total parameters:', 'None', total_param)
return total_param
"""
TRAINING CODE
"""
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
start0 = time.time()
per_epoch_time = []
DATASET_NAME = dataset.name
if net_params['LPE'] in ['edge', 'node']:
st = time.time()
print("[!] Computing Laplace Decompositions..")
dataset._laplace_decomp(net_params['m'])
print('Time taken to decompose Laplacians: ',time.time()-st)
if net_params['full_graph']:
st = time.time()
print("[!] Adding full graph connectivity..")
dataset._make_full_graph()
print('Time taken to add full graph connectivity: ',time.time()-st)
if net_params['LPE'] == 'edge':
st = time.time()
print("[!] Computing edge Laplace features..")
dataset._add_edge_laplace_feats()
print('Time taken to compute edge Laplace features: ',time.time()-st)
trainset, valset, testset = dataset.train, dataset.val, dataset.test
net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
device = net_params['device']
# Write network and optimization hyper-parameters in folder config/
with open(write_config_file + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n""" .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
writer = SummaryWriter(log_dir=log_dir)
# setting seeds
random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])
if device.type == 'cuda':
torch.cuda.manual_seed(params['seed'])
print("Training Graphs: ", len(trainset))
print("Validation Graphs: ", len(valset))
print("Test Graphs: ", len(testset))
print("Number of Classes: ", net_params['n_classes'])
model = gnn_model(net_params['LPE'], net_params)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
factor=params['lr_reduce_factor'],
patience=params['lr_schedule_patience'],
verbose=True)
epoch_train_losses, epoch_val_losses = [], []
epoch_train_accs, epoch_val_accs, epoch_test_accs = [], [], []
# import train and evaluate functions
from train.train_SBMs_node_classification import train_epoch, evaluate_network
train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
# At any point you can hit Ctrl + C to break out of training early.
try:
with tqdm(range(params['epochs'])) as t:
for epoch in t:
start = time.time()
epoch_train_loss, epoch_train_acc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])
epoch_val_loss, epoch_val_acc = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
_, epoch_test_acc = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])
epoch_train_losses.append(epoch_train_loss)
epoch_val_losses.append(epoch_val_loss)
epoch_train_accs.append(epoch_train_acc)
epoch_val_accs.append(epoch_val_acc)
epoch_test_accs.append(epoch_test_acc)
writer.add_scalar('train/_loss', epoch_train_loss, epoch)
writer.add_scalar('val/_loss', epoch_val_loss, epoch)
writer.add_scalar('train/_acc', epoch_train_acc, epoch)
writer.add_scalar('val/_acc', epoch_val_acc, epoch)
writer.add_scalar('test/_acc', epoch_test_acc, epoch)
writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)
t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
train_loss=epoch_train_loss, val_loss=epoch_val_loss,
train_acc=epoch_train_acc, val_acc=epoch_val_acc,
test_acc=epoch_test_acc)
per_epoch_time.append(time.time()-start)
# Saving checkpoint
ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))
files = glob.glob(ckpt_dir + '/*.pkl')
for file in files:
epoch_nb = file.split('_')[-1]
epoch_nb = int(epoch_nb.split('.')[0])
if epoch_nb < epoch-1:
os.remove(file)
scheduler.step(epoch_val_loss)
if optimizer.param_groups[0]['lr'] < params['min_lr']:
print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
break
# Stop training after params['max_time'] hours
if time.time()-start0 > params['max_time']*3600:
print('-' * 89)
print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
break
except KeyboardInterrupt:
print('-' * 89)
print('Exiting from training early because of KeyboardInterrupt')
#Return test and train metrics at best val metric
index = epoch_val_accs.index(max(epoch_val_accs))
test_acc = epoch_test_accs[index]
train_acc = epoch_train_accs[index]
print("Test Accuracy: {:.4f}".format(test_acc))
print("Train Accuracy: {:.4f}".format(train_acc))
print("Convergence Time (Epochs): {:.4f}".format(epoch))
print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-start0))
print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))
writer.close()
"""
Write the results in out_dir/results folder
"""
with open(write_file_name + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
FINAL RESULTS\nTEST ACCURACY: {:.4f}\nTRAIN ACCURACY: {:.4f}\n\n
Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
.format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
test_acc, train_acc, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))
def main():
"""
USER CONTROLS
"""
parser = argparse.ArgumentParser()
parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
parser.add_argument('--gpu_id', help="Please give a value for gpu id")
parser.add_argument('--model', help="Please give a value for model name")
parser.add_argument('--dataset', help="Please give a value for dataset name")
parser.add_argument('--out_dir', help="Please give a value for out_dir")
parser.add_argument('--seed', help="Please give a value for seed")
parser.add_argument('--epochs', help="Please give a value for epochs")
parser.add_argument('--batch_size', help="Please give a value for batch_size")
parser.add_argument('--init_lr', help="Please give a value for init_lr")
parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
parser.add_argument('--min_lr', help="Please give a value for min_lr")
parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
parser.add_argument('--max_time', help="Please give a value for max_time")
#Model details
parser.add_argument('--full_graph', help="Please give a value for full_graph")
parser.add_argument('--gamma', help="Please give a value for gamma")
parser.add_argument('--m', help="Please give a value for m")
parser.add_argument('--LPE', help="Please give a value for LPE")
parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
parser.add_argument('--residual', help="Please give a value for readout")
parser.add_argument('--readout', help="Please give a value for readout")
parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
parser.add_argument('--dropout', help="Please give a value for dropout")
parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
args = parser.parse_args()
with open(args.config) as f:
config = json.load(f)
# device
if args.gpu_id is not None:
config['gpu']['id'] = int(args.gpu_id)
config['gpu']['use'] = True
device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
# model, dataset, out_dir
if args.model is not None:
MODEL_NAME = args.model
else:
MODEL_NAME = config['model']
if args.dataset is not None:
DATASET_NAME = args.dataset
else:
DATASET_NAME = config['dataset']
dataset = LoadData(DATASET_NAME)
if args.out_dir is not None:
out_dir = args.out_dir
else:
out_dir = config['out_dir']
# parameters
params = config['params']
if args.seed is not None:
params['seed'] = int(args.seed)
if args.epochs is not None:
params['epochs'] = int(args.epochs)
if args.batch_size is not None:
params['batch_size'] = int(args.batch_size)
if args.init_lr is not None:
params['init_lr'] = float(args.init_lr)
if args.lr_reduce_factor is not None:
params['lr_reduce_factor'] = float(args.lr_reduce_factor)
if args.lr_schedule_patience is not None:
params['lr_schedule_patience'] = int(args.lr_schedule_patience)
if args.min_lr is not None:
params['min_lr'] = float(args.min_lr)
if args.weight_decay is not None:
params['weight_decay'] = float(args.weight_decay)
if args.print_epoch_interval is not None:
params['print_epoch_interval'] = int(args.print_epoch_interval)
if args.max_time is not None:
params['max_time'] = float(args.max_time)
# model parameters
net_params = config['net_params']
net_params['device'] = device
net_params['gpu_id'] = config['gpu']['id']
net_params['batch_size'] = params['batch_size']
if args.full_graph is not None:
net_params['full_graph'] = True if args.full_graph=='True' else False
if args.gamma is not None:
net_params['gamma'] = float(args.gamma)
if args.m is not None:
net_params['m'] = int(args.m)
if args.LPE is not None:
net_params['LPE'] = args.LPE
if net_params['LPE'] not in ['node', 'edge', 'none']:
print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
exit()
if args.LPE_layers is not None:
net_params['LPE_layers'] = int(args.LPE_layers)
if args.LPE_dim is not None:
net_params['LPE_dim'] = int(args.LPE_dim)
if args.LPE_n_heads is not None:
net_params['LPE_n_heads'] = int(args.LPE_n_heads)
if args.GT_layers is not None:
net_params['GT_layers'] = int(args.GT_layers)
if args.GT_hidden_dim is not None:
net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
if args.GT_out_dim is not None:
net_params['GT_out_dim'] = int(args.GT_out_dim)
if args.GT_n_heads is not None:
net_params['GT_n_heads'] = int(args.GT_n_heads)
if args.residual is not None:
net_params['residual'] = True if args.residual=='True' else False
if args.readout is not None:
net_params['readout'] = args.readout
if args.in_feat_dropout is not None:
net_params['in_feat_dropout'] = float(args.in_feat_dropout)
if args.dropout is not None:
net_params['dropout'] = float(args.dropout)
if args.layer_norm is not None:
net_params['layer_norm'] = True if args.layer_norm=='True' else False
if args.batch_norm is not None:
net_params['batch_norm'] = True if args.batch_norm=='True' else False
# SBM
net_params['in_dim'] = torch.unique(dataset.train[0][0].ndata['feat'],dim=0).size(0) # node_dim (feat is an integer)
net_params['n_classes'] = torch.unique(dataset.train[0][1],dim=0).size(0)
root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file
if not os.path.exists(out_dir + 'results'):
os.makedirs(out_dir + 'results')
if not os.path.exists(out_dir + 'configs'):
os.makedirs(out_dir + 'configs')
train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
main()
================================================
FILE: main_ZINC_graph_regression.py
================================================
"""
IMPORTING LIBS
"""
import dgl
import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from tqdm import tqdm
"""
IMPORTING CUSTOM MODULES/METHODS
"""
from nets.ZINC_graph_regression.load_net import gnn_model
from data.data import LoadData
"""
GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
if torch.cuda.is_available() and use_gpu:
print('cuda available with GPU:',torch.cuda.get_device_name(0))
device = torch.device("cuda")
else:
print('cuda not available')
device = torch.device("cpu")
return device
"""
VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
model = gnn_model(LPE, net_params)
total_param = 0
print("MODEL DETAILS:\n")
for param in model.parameters():
total_param += np.prod(list(param.data.size()))
if LPE == 'edge':
print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
elif LPE == 'node':
print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
else:
print('Encoding Type/Total parameters:', 'None', total_param)
return total_param
"""
TRAINING CODE
"""
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
start0 = time.time()
per_epoch_time = []
DATASET_NAME = dataset.name
if net_params['LPE'] in ['edge', 'node']:
st = time.time()
print("[!] Computing Laplace Decompositions..")
dataset._laplace_decomp(net_params['m'])
print('Time taken to decompose Laplacians: ',time.time()-st)
if net_params['full_graph']:
st = time.time()
print("[!] Adding full graph connectivity..")
dataset._make_full_graph()
print('Time taken to add full graph connectivity: ',time.time()-st)
if net_params['LPE'] == 'edge':
st = time.time()
print("[!] Computing edge Laplace features..")
dataset._add_edge_laplace_feats()
print('Time taken to compute edge Laplace features: ',time.time()-st)
trainset, valset, testset = dataset.train, dataset.val, dataset.test
net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
device = net_params['device']
# Write the network and optimization hyper-parameters in folder config/
with open(write_config_file + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n""" .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
writer = SummaryWriter(log_dir=log_dir)
# setting seeds
random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])
if device.type == 'cuda':
torch.cuda.manual_seed(params['seed'])
print("Training Graphs: ", len(trainset))
print("Validation Graphs: ", len(valset))
print("Test Graphs: ", len(testset))
model = gnn_model(net_params['LPE'], net_params)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
factor=params['lr_reduce_factor'],
patience=params['lr_schedule_patience'],
verbose=True)
epoch_train_losses, epoch_val_losses = [], []
epoch_train_MAEs, epoch_val_MAEs, epoch_test_MAEs = [], [], []
# import train and evaluate functions
from train.train_ZINC_graph_regression import train_epoch, evaluate_network
train_loader = DataLoader(trainset, num_workers=4, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
val_loader = DataLoader(valset, num_workers=4, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
test_loader = DataLoader(testset, num_workers=4, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
# At any point you can hit Ctrl + C to break out of training early.
try:
with tqdm(range(params['epochs'])) as t:
for epoch in t:
t.set_description('Epoch %d' % epoch)
start = time.time()
epoch_train_loss, epoch_train_mae, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])
epoch_val_loss, epoch_val_mae = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
_, epoch_test_mae = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])
epoch_train_losses.append(epoch_train_loss)
epoch_val_losses.append(epoch_val_loss)
epoch_train_MAEs.append(epoch_train_mae)
epoch_val_MAEs.append(epoch_val_mae)
epoch_test_MAEs.append(epoch_test_mae)
writer.add_scalar('train/_loss', epoch_train_loss, epoch)
writer.add_scalar('val/_loss', epoch_val_loss, epoch)
writer.add_scalar('train/_mae', epoch_train_mae, epoch)
writer.add_scalar('val/_mae', epoch_val_mae, epoch)
writer.add_scalar('test/_mae', epoch_test_mae, epoch)
writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)
t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
train_loss=epoch_train_loss, val_loss=epoch_val_loss,
train_MAE=epoch_train_mae, val_MAE=epoch_val_mae,
test_MAE=epoch_test_mae)
per_epoch_time.append(time.time()-start)
# Saving checkpoint
ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))
files = glob.glob(ckpt_dir + '/*.pkl')
for file in files:
epoch_nb = file.split('_')[-1]
epoch_nb = int(epoch_nb.split('.')[0])
if epoch_nb < epoch-1:
os.remove(file)
scheduler.step(epoch_val_loss)
if optimizer.param_groups[0]['lr'] < params['min_lr']:
print("\n!! LR EQUAL TO MIN LR SET.")
break
# Stop training after params['max_time'] hours
if time.time()-start0 > params['max_time']*3600:
print('-' * 89)
print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
break
except KeyboardInterrupt:
print('-' * 89)
print('Exiting from training early because of KeyboardInterrupt')
#Return test and train metrics at best val metric
index = epoch_val_MAEs.index(min(epoch_val_MAEs))
test_mae = epoch_test_MAEs[index]
train_mae = epoch_train_MAEs[index]
print("Test MAE: {:.4f}".format(test_mae))
print("Train MAE: {:.4f}".format(train_mae))
print("Convergence Time (Epochs): {:.4f}".format(epoch))
print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-start0))
print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))
writer.close()
"""
Write the results in out_dir/results folder
"""
with open(write_file_name + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
FINAL RESULTS\nTEST MAE: {:.4f}\nTRAIN MAE: {:.4f}\n\n
Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
.format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
test_mae, train_mae, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))
def main():
"""
USER CONTROLS
"""
parser = argparse.ArgumentParser()
parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
parser.add_argument('--gpu_id', help="Please give a value for gpu id")
parser.add_argument('--model', help="Please give a value for model name")
parser.add_argument('--dataset', help="Please give a value for dataset name")
parser.add_argument('--out_dir', help="Please give a value for out_dir")
parser.add_argument('--seed', help="Please give a value for seed")
parser.add_argument('--epochs', help="Please give a value for epochs")
parser.add_argument('--batch_size', help="Please give a value for batch_size")
parser.add_argument('--init_lr', help="Please give a value for init_lr")
parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
parser.add_argument('--min_lr', help="Please give a value for min_lr")
parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
parser.add_argument('--max_time', help="Please give a value for max_time")
#Model details
parser.add_argument('--full_graph', help="Please give a value for full_graph")
parser.add_argument('--gamma', help="Please give a value for gamma")
parser.add_argument('--m', help="Please give a value for m")
parser.add_argument('--LPE', help="Please give a value for LPE")
parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
parser.add_argument('--residual', help="Please give a value for readout")
parser.add_argument('--readout', help="Please give a value for readout")
parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
parser.add_argument('--dropout', help="Please give a value for dropout")
parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
args = parser.parse_args()
with open(args.config) as f:
config = json.load(f)
# device
if args.gpu_id is not None:
config['gpu']['id'] = int(args.gpu_id)
config['gpu']['use'] = True
device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
# model, dataset, out_dir
if args.model is not None:
MODEL_NAME = args.model
else:
MODEL_NAME = config['model']
if args.dataset is not None:
DATASET_NAME = args.dataset
else:
DATASET_NAME = config['dataset']
dataset = LoadData(DATASET_NAME)
if args.out_dir is not None:
out_dir = args.out_dir
else:
out_dir = config['out_dir']
# parameters
params = config['params']
if args.seed is not None:
params['seed'] = int(args.seed)
if args.epochs is not None:
params['epochs'] = int(args.epochs)
if args.batch_size is not None:
params['batch_size'] = int(args.batch_size)
if args.init_lr is not None:
params['init_lr'] = float(args.init_lr)
if args.lr_reduce_factor is not None:
params['lr_reduce_factor'] = float(args.lr_reduce_factor)
if args.lr_schedule_patience is not None:
params['lr_schedule_patience'] = int(args.lr_schedule_patience)
if args.min_lr is not None:
params['min_lr'] = float(args.min_lr)
if args.weight_decay is not None:
params['weight_decay'] = float(args.weight_decay)
if args.print_epoch_interval is not None:
params['print_epoch_interval'] = int(args.print_epoch_interval)
if args.max_time is not None:
params['max_time'] = float(args.max_time)
# model parameters
net_params = config['net_params']
net_params['device'] = device
net_params['gpu_id'] = config['gpu']['id']
net_params['batch_size'] = params['batch_size']
if args.full_graph is not None:
net_params['full_graph'] = True if args.full_graph=='True' else False
if args.gamma is not None:
net_params['gamma'] = float(args.gamma)
if args.m is not None:
net_params['m'] = int(args.m)
if args.LPE is not None:
net_params['LPE'] = args.LPE
if net_params['LPE'] not in ['node', 'edge', 'none']:
print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
exit()
if args.LPE_layers is not None:
net_params['LPE_layers'] = int(args.LPE_layers)
if args.LPE_dim is not None:
net_params['LPE_dim'] = int(args.LPE_dim)
if args.LPE_n_heads is not None:
net_params['LPE_n_heads'] = int(args.LPE_n_heads)
if args.GT_layers is not None:
net_params['GT_layers'] = int(args.GT_layers)
if args.GT_hidden_dim is not None:
net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
if args.GT_out_dim is not None:
net_params['GT_out_dim'] = int(args.GT_out_dim)
if args.GT_n_heads is not None:
net_params['GT_n_heads'] = int(args.GT_n_heads)
if args.residual is not None:
net_params['residual'] = True if args.residual=='True' else False
if args.readout is not None:
net_params['readout'] = args.readout
if args.in_feat_dropout is not None:
net_params['in_feat_dropout'] = float(args.in_feat_dropout)
if args.dropout is not None:
net_params['dropout'] = float(args.dropout)
if args.layer_norm is not None:
net_params['layer_norm'] = True if args.layer_norm=='True' else False
if args.batch_norm is not None:
net_params['batch_norm'] = True if args.batch_norm=='True' else False
# ZINC
net_params['num_atom_type'] = dataset.num_atom_type
net_params['num_bond_type'] = dataset.num_bond_type
#If using full graph, need to add a possible edge type (fake edge)
if net_params['full_graph']:
net_params['num_bond_type']+=1
root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file
if not os.path.exists(out_dir + 'results'):
os.makedirs(out_dir + 'results')
if not os.path.exists(out_dir + 'configs'):
os.makedirs(out_dir + 'configs')
train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
main()
================================================
FILE: main_molhiv.py
================================================
"""
IMPORTING LIBS
"""
import dgl
import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from tqdm import tqdm
"""
IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molhiv_graph_regression.load_net import gnn_model
from data.data import LoadData
"""
GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
if torch.cuda.is_available() and use_gpu:
print('cuda available with GPU:',torch.cuda.get_device_name(0))
device = torch.device("cuda")
else:
print('cuda not available')
device = torch.device("cpu")
return device
"""
VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
model = gnn_model(LPE, net_params)
total_param = 0
print("MODEL DETAILS:\n")
for param in model.parameters():
total_param += np.prod(list(param.data.size()))
if LPE == 'edge':
print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
elif LPE == 'node':
print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
else:
print('Encoding Type/Total parameters:', 'None', total_param)
return total_param
"""
TRAINING CODE
"""
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
t0 = time.time()
per_epoch_time = []
DATASET_NAME = dataset.name
if net_params['LPE'] in ['edge', 'node']:
st = time.time()
print("[!] Computing Laplace Decompositions..")
dataset._laplace_decomp(net_params['m'])
print('Time LapPE:',time.time()-st)
if net_params['full_graph']:
st = time.time()
print("[!] Adding full graph connectivity..")
dataset._make_full_graph()
print('Time taken to convert to full graphs:',time.time()-st)
if net_params['LPE'] == 'edge':
st = time.time()
print("[!] Computing edge Laplace features..")
dataset._add_edge_laplace_feats()
print('Time taken to compute edge Laplace features: ',time.time()-st)
net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
trainset, valset, testset = dataset.train, dataset.val, dataset.test
root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
device = net_params['device']
# Write the network and optimization hyper-parameters in folder config/
with open(write_config_file + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n""" .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
writer = SummaryWriter(log_dir=log_dir)
# setting seeds
random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])
if device.type == 'cuda':
torch.cuda.manual_seed(params['seed'])
print("Training Graphs: ", len(trainset))
print("Validation Graphs: ", len(valset))
print("Test Graphs: ", len(testset))
model = gnn_model(net_params['LPE'], net_params)
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
factor=params['lr_reduce_factor'],
patience=params['lr_schedule_patience'],
verbose=True)
epoch_train_losses, epoch_val_losses = [], []
epoch_train_AUCs, epoch_val_AUCs, epoch_test_AUCs = [], [], []
from train.train_molhiv import train_epoch, evaluate_network
train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
prev_lr = params['init_lr']
# At any point you can hit Ctrl + C to break out of training early.
try:
with tqdm(range(params['epochs'])) as t:
for epoch in t:
t.set_description('Epoch %d' % epoch)
start = time.time()
epoch_train_loss, epoch_train_auc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])
epoch_val_loss, epoch_val_auc = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
_, epoch_test_auc = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])
epoch_train_losses.append(epoch_train_loss)
epoch_val_losses.append(epoch_val_loss)
epoch_train_AUCs.append(epoch_train_auc)
epoch_val_AUCs.append(epoch_val_auc)
epoch_test_AUCs.append(epoch_test_auc)
writer.add_scalar('train/_loss', epoch_train_loss, epoch)
writer.add_scalar('val/_loss', epoch_val_loss, epoch)
writer.add_scalar('train/_auc', epoch_train_auc, epoch)
writer.add_scalar('val/_auc', epoch_val_auc, epoch)
writer.add_scalar('test/_auc', epoch_test_auc, epoch)
writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)
t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
train_loss=epoch_train_loss, val_loss=epoch_val_loss,
train_AUC=epoch_train_auc, val_AUC=epoch_val_auc,
test_AUC=epoch_test_auc)
per_epoch_time.append(time.time()-start)
# Saving checkpoint
ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))
files = glob.glob(ckpt_dir + '/*.pkl')
for file in files:
epoch_nb = file.split('_')[-1]
epoch_nb = int(epoch_nb.split('.')[0])
if epoch_nb < epoch-1:
os.remove(file)
scheduler.step(epoch_val_loss)
current_lr = optimizer.param_groups[0]['lr']
if current_lr < prev_lr:
print(f"Learning rate dropped to {current_lr}")
prev_lr = current_lr
if current_lr < params['min_lr']:
print("\n!! LR EQUAL TO MIN LR SET.")
break
# Stop training after params['max_time'] hours
if time.time()-t0 > params['max_time']*3600:
print('-' * 89)
print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
break
except KeyboardInterrupt:
print('-' * 89)
print('Exiting from training early because of KeyboardInterrupt')
#Return test and train metrics at best val metric
index = epoch_val_AUCs.index(max(epoch_val_AUCs))
test_auc = epoch_test_AUCs[index]
train_auc = epoch_train_AUCs[index]
print("Test AUC: {:.4f}".format(test_auc))
print("Train AUC: {:.4f}".format(train_auc))
print("Convergence Time (Epochs): {:.4f}".format(epoch))
print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-t0))
print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))
writer.close()
"""
Write the results in out_dir/results folder
"""
with open(write_file_name + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
FINAL RESULTS\nTEST AUC: {:.4f}\nTRAIN AUC: {:.4f}\n\n
Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
.format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
test_auc, train_auc, epoch, (time.time()-t0)/3600, np.mean(per_epoch_time)))
def main():
"""
USER CONTROLS
"""
parser = argparse.ArgumentParser()
parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
parser.add_argument('--gpu_id', help="Please give a value for gpu id")
parser.add_argument('--model', help="Please give a value for model name")
parser.add_argument('--dataset', help="Please give a value for dataset name")
parser.add_argument('--out_dir', help="Please give a value for out_dir")
parser.add_argument('--seed', help="Please give a value for seed")
parser.add_argument('--epochs', help="Please give a value for epochs")
parser.add_argument('--batch_size', help="Please give a value for batch_size")
parser.add_argument('--init_lr', help="Please give a value for init_lr")
parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
parser.add_argument('--min_lr', help="Please give a value for min_lr")
parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
parser.add_argument('--max_time', help="Please give a value for max_time")
#Model details
parser.add_argument('--full_graph', help="Please give a value for full_graph")
parser.add_argument('--gamma', help="Please give a value for gamma")
parser.add_argument('--m', help="Please give a value for m")
parser.add_argument('--LPE', help="Please give a value for LPE")
parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
parser.add_argument('--residual', help="Please give a value for readout")
parser.add_argument('--readout', help="Please give a value for readout")
parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
parser.add_argument('--dropout', help="Please give a value for dropout")
parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
args = parser.parse_args()
with open(args.config) as f:
config = json.load(f)
# device
if args.gpu_id is not None:
config['gpu']['id'] = int(args.gpu_id)
config['gpu']['use'] = True
device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
# model, dataset, out_dir
if args.model is not None:
MODEL_NAME = args.model
else:
MODEL_NAME = config['model']
if args.dataset is not None:
DATASET_NAME = args.dataset
else:
DATASET_NAME = config['dataset']
dataset = LoadData(DATASET_NAME)
if args.out_dir is not None:
out_dir = args.out_dir
else:
out_dir = config['out_dir']
# parameters
params = config['params']
if args.seed is not None:
params['seed'] = int(args.seed)
if args.epochs is not None:
params['epochs'] = int(args.epochs)
if args.batch_size is not None:
params['batch_size'] = int(args.batch_size)
if args.init_lr is not None:
params['init_lr'] = float(args.init_lr)
if args.lr_reduce_factor is not None:
params['lr_reduce_factor'] = float(args.lr_reduce_factor)
if args.lr_schedule_patience is not None:
params['lr_schedule_patience'] = int(args.lr_schedule_patience)
if args.min_lr is not None:
params['min_lr'] = float(args.min_lr)
if args.weight_decay is not None:
params['weight_decay'] = float(args.weight_decay)
if args.print_epoch_interval is not None:
params['print_epoch_interval'] = int(args.print_epoch_interval)
if args.max_time is not None:
params['max_time'] = float(args.max_time)
# model parameters
net_params = config['net_params']
net_params['device'] = device
net_params['gpu_id'] = config['gpu']['id']
net_params['batch_size'] = params['batch_size']
if args.full_graph is not None:
net_params['full_graph'] = True if args.full_graph=='True' else False
if args.gamma is not None:
net_params['gamma'] = float(args.gamma)
if args.m is not None:
net_params['m'] = int(args.m)
if args.LPE is not None:
net_params['LPE'] = args.LPE
if net_params['LPE'] not in ['node', 'edge', 'none']:
print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
exit()
if args.LPE_layers is not None:
net_params['LPE_layers'] = int(args.LPE_layers)
if args.LPE_dim is not None:
net_params['LPE_dim'] = int(args.LPE_dim)
if args.LPE_n_heads is not None:
net_params['LPE_n_heads'] = int(args.LPE_n_heads)
if args.GT_layers is not None:
net_params['GT_layers'] = int(args.GT_layers)
if args.GT_hidden_dim is not None:
net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
if args.GT_out_dim is not None:
net_params['GT_out_dim'] = int(args.GT_out_dim)
if args.GT_n_heads is not None:
net_params['GT_n_heads'] = int(args.GT_n_heads)
if args.residual is not None:
net_params['residual'] = True if args.residual=='True' else False
if args.readout is not None:
net_params['readout'] = args.readout
if args.in_feat_dropout is not None:
net_params['in_feat_dropout'] = float(args.in_feat_dropout)
if args.dropout is not None:
net_params['dropout'] = float(args.dropout)
if args.layer_norm is not None:
net_params['layer_norm'] = True if args.layer_norm=='True' else False
if args.batch_norm is not None:
net_params['batch_norm'] = True if args.batch_norm=='True' else False
root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file
if not os.path.exists(out_dir + 'results'):
os.makedirs(out_dir + 'results')
if not os.path.exists(out_dir + 'configs'):
os.makedirs(out_dir + 'configs')
train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
main()
================================================
FILE: main_molpcba.py
================================================
"""
IMPORTING LIBS
"""
import dgl
import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from tqdm import tqdm
"""
IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molpcba.load_net import gnn_model
from data.data import LoadData
torch.set_default_dtype(torch.float32)
"""
GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
if torch.cuda.is_available() and use_gpu:
print('cuda available with GPU:',torch.cuda.get_device_name(0))
device = torch.device("cuda")
else:
print('cuda not available')
device = torch.device("cpu")
return device
"""
VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
model = gnn_model(LPE, net_params)
total_param = 0
print("MODEL DETAILS:\n")
for param in model.parameters():
total_param += np.prod(list(param.data.size()))
if LPE == 'edge':
print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
elif LPE == 'node':
print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
else:
print('Encoding Type/Total parameters:', 'None', total_param)
return total_param
"""
TRAINING CODE
"""
def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
t0 = time.time()
per_epoch_time = []
DATASET_NAME = dataset.name
if net_params['LPE'] in ['edge', 'node']:
st = time.time()
print("[!] Computing Laplace Decompositions..")
dataset._laplace_decomp(net_params['m'])
print('Time LapPE:',time.time()-st)
if net_params['full_graph']:
st = time.time()
print("[!] Adding full graph connectivity..")
dataset._make_full_graph()
print('Time taken to convert to full graphs:',time.time()-st)
if net_params['LPE'] == 'edge':
st = time.time()
print("[!] Computing edge Laplace features..")
dataset._add_edge_laplace_feats()
print('Time taken to compute edge Laplace features: ',time.time()-st)
net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
trainset, valset, testset = dataset.train, dataset.val, dataset.test
root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
device = net_params['device']
# Write the network and optimization hyper-parameters in folder config/
with open(write_config_file + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n""" .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
writer = SummaryWriter(log_dir=log_dir)
# setting seeds
random.seed(params['seed'])
np.random.seed(params['seed'])
torch.manual_seed(params['seed'])
if device.type == 'cuda':
torch.cuda.manual_seed(params['seed'])
print("Training Graphs: ", len(trainset))
print("Validation Graphs: ", len(valset))
print("Test Graphs: ", len(testset))
model = gnn_model(net_params['LPE'], net_params)
model = model.to(device=device)
optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
factor=params['lr_reduce_factor'],
patience=params['lr_schedule_patience'],
verbose=True)
epoch_train_losses, epoch_val_losses = [], []
epoch_train_APs, epoch_val_APs, epoch_test_APs = [], [], []
from train.train_molpcba import train_epoch, evaluate_network
train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
# At any point you can hit Ctrl + C to break out of training early.
try:
with tqdm(range(params['epochs'])) as t:
for epoch in t:
t.set_description('Epoch %d' % epoch)
start = time.time()
epoch_train_loss, epoch_train_ap, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'], params["batch_accumulation"])
epoch_val_loss, epoch_val_ap = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
_, epoch_test_ap = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])
epoch_train_losses.append(epoch_train_loss)
epoch_val_losses.append(epoch_val_loss)
epoch_train_APs.append(epoch_train_ap)
epoch_val_APs.append(epoch_val_ap)
epoch_test_APs.append(epoch_test_ap)
writer.add_scalar('train/_loss', epoch_train_loss, epoch)
writer.add_scalar('val/_loss', epoch_val_loss, epoch)
writer.add_scalar('train/_AP', epoch_train_ap, epoch)
writer.add_scalar('val/_AP', epoch_val_ap, epoch)
writer.add_scalar('test/_AP', epoch_test_ap, epoch)
writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)
t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
train_loss=epoch_train_loss, val_loss=epoch_val_loss,
train_AP=epoch_train_ap, val_AP=epoch_val_ap,
test_AP=epoch_test_ap)
per_epoch_time.append(time.time()-start)
# Saving checkpoint
ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
if not os.path.exists(ckpt_dir):
os.makedirs(ckpt_dir)
torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))
files = glob.glob(ckpt_dir + '/*.pkl')
for file in files:
epoch_nb = file.split('_')[-1]
epoch_nb = int(epoch_nb.split('.')[0])
if epoch_nb < epoch-1:
os.remove(file)
scheduler.step(-epoch_val_ap)
if optimizer.param_groups[0]['lr'] < params['min_lr']:
print("\n!! LR EQUAL TO MIN LR SET.")
break
# Stop training after params['max_time'] hours
if time.time()-t0 > params['max_time']*3600:
print('-' * 89)
print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
break
except Exception as e: # Sometimes there's out of memory error after many epochs
print('-' * 89)
print(f'Exiting from training early Exception: {e}')
except KeyboardInterrupt: # Sometimes there's out of memory error after many epochs
print('-' * 89)
print(f'Exiting from training keyboard interrupt')
#Return test and train metrics at best val metric
index = epoch_val_APs.index(max(epoch_val_APs))
test_ap = epoch_test_APs[index]
val_ap = epoch_val_APs[index]
train_ap = epoch_train_APs[index]
print("Test AP: {:.4f}".format(test_ap))
print("Val AP: {:.4f}".format(val_ap))
print("Train AP: {:.4f}".format(train_ap))
print("Best epoch index: {:.4f}".format(index))
print("Convergence Time (Epochs): {:.4f}".format(epoch))
print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-t0))
print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))
writer.close()
"""
Write the results in out_dir/results folder
"""
with open(write_file_name + '.txt', 'w') as f:
f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
FINAL RESULTS\nTEST AP: {:.4f}\nTRAIN AP: {:.4f}\n\n
Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
.format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
test_ap, train_ap, epoch, (time.time()-t0)/3600, np.mean(per_epoch_time)))
def main():
"""
USER CONTROLS
"""
parser = argparse.ArgumentParser()
parser.add_argument('--config', default="configs/MOLPCBA/optimized", help="Please give a config.json file with training/model/data/param details")
parser.add_argument('--gpu_id', help="Please give a value for gpu id")
parser.add_argument('--model', help="Please give a value for model name")
parser.add_argument('--dataset', help="Please give a value for dataset name")
parser.add_argument('--out_dir', help="Please give a value for out_dir")
parser.add_argument('--seed', help="Please give a value for seed")
parser.add_argument('--epochs', help="Please give a value for epochs")
parser.add_argument('--batch_size', help="Please give a value for batch_size")
parser.add_argument('--init_lr', help="Please give a value for init_lr")
parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
parser.add_argument('--min_lr', help="Please give a value for min_lr")
parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
parser.add_argument('--max_time', help="Please give a value for max_time")
#Model details
parser.add_argument('--full_graph', help="Please give a value for full_graph")
parser.add_argument('--gamma', help="Please give a value for gamma")
parser.add_argument('--m', help="Please give a value for m")
parser.add_argument('--LPE', help="Please give a value for LPE")
parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
parser.add_argument('--extra_mlp', help="Please give a value for extra_mlp")
parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
parser.add_argument('--residual', help="Please give a value for readout")
parser.add_argument('--readout', help="Please give a value for readout")
parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
parser.add_argument('--dropout', help="Please give a value for dropout")
parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
args = parser.parse_args()
with open(args.config) as f:
config = json.load(f)
# device
if args.gpu_id is not None:
config['gpu']['id'] = int(args.gpu_id)
config['gpu']['use'] = True
device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
# model, dataset, out_dir
if args.model is not None:
MODEL_NAME = args.model
else:
MODEL_NAME = config['model']
if args.dataset is not None:
DATASET_NAME = args.dataset
else:
DATASET_NAME = config['dataset']
dataset = LoadData(DATASET_NAME)
if args.out_dir is not None:
out_dir = args.out_dir
else:
out_dir = config['out_dir']
# parameters
params = config['params']
if args.seed is not None:
params['seed'] = int(args.seed)
if args.epochs is not None:
params['epochs'] = int(args.epochs)
if args.batch_size is not None:
params['batch_size'] = int(args.batch_size)
if args.init_lr is not None:
params['init_lr'] = float(args.init_lr)
if args.lr_reduce_factor is not None:
params['lr_reduce_factor'] = float(args.lr_reduce_factor)
if args.lr_schedule_patience is not None:
params['lr_schedule_patience'] = int(args.lr_schedule_patience)
if args.min_lr is not None:
params['min_lr'] = float(args.min_lr)
if args.weight_decay is not None:
params['weight_decay'] = float(args.weight_decay)
if args.print_epoch_interval is not None:
params['print_epoch_interval'] = int(args.print_epoch_interval)
if args.max_time is not None:
params['max_time'] = float(args.max_time)
# model parameters
net_params = config['net_params']
net_params['device'] = device
net_params['gpu_id'] = config['gpu']['id']
net_params['batch_size'] = params['batch_size']
if args.full_graph is not None:
net_params['full_graph'] = True if args.full_graph=='True' else False
if args.gamma is not None:
net_params['gamma'] = float(args.gamma)
if args.m is not None:
net_params['m'] = int(args.m)
if args.LPE is not None:
net_params['LPE'] = args.LPE
if args.extra_mlp is not None:
net_params['extra_mlp'] = args.extra_mlp
if net_params['LPE'] not in ['node', 'edge', 'none']:
print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
exit()
if args.LPE_layers is not None:
net_params['LPE_layers'] = int(args.LPE_layers)
if args.LPE_dim is not None:
net_params['LPE_dim'] = int(args.LPE_dim)
if args.LPE_n_heads is not None:
net_params['LPE_n_heads'] = int(args.LPE_n_heads)
if args.GT_layers is not None:
net_params['GT_layers'] = int(args.GT_layers)
if args.GT_hidden_dim is not None:
net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
if args.GT_out_dim is not None:
net_params['GT_out_dim'] = int(args.GT_out_dim)
if args.GT_n_heads is not None:
net_params['GT_n_heads'] = int(args.GT_n_heads)
if args.residual is not None:
net_params['residual'] = True if args.residual=='True' else False
if args.readout is not None:
net_params['readout'] = args.readout
if args.in_feat_dropout is not None:
net_params['in_feat_dropout'] = float(args.in_feat_dropout)
if args.dropout is not None:
net_params['dropout'] = float(args.dropout)
if args.layer_norm is not None:
net_params['layer_norm'] = True if args.layer_norm=='True' else False
if args.batch_norm is not None:
net_params['batch_norm'] = True if args.batch_norm=='True' else False
root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file
if not os.path.exists(out_dir + 'results'):
os.makedirs(out_dir + 'results')
if not os.path.exists(out_dir + 'configs'):
os.makedirs(out_dir + 'configs')
train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)
main()
================================================
FILE: misc/download_datasets.md
================================================
# Download datasets
All the datasets work with DGL 0.5.x or later. Please update the environment using the yml files in the root directory if the use of these datasets throw error(s).
<br>
## 1. ZINC molecular dataset
ZINC size is 58.9MB.
```
# At the root of the project
cd data/
bash script_download_molecules.sh
```
Script [script_download_molecules.sh](../data/script_download_molecules.sh) is located here. Refer to [benchmarking-gnns repo](https://github.com/graphdeeplearning/benchmarking-gnns) for details on preparation.
<br>
## 2. PATTERN/CLUSTER SBM datasets
PATTERN size is 1.98GB and CLUSTER size is 1.26GB.
```
# At the root of the project
cd data/
bash script_download_SBMs.sh
```
Script [script_download_SBMs.sh](../data/script_download_SBMs.sh) is located here. Refer to [benchmarking-gnns repo](https://github.com/graphdeeplearning/benchmarking-gnns) for details on preparation.
<br>
## 3. All BGNN datasets
```
# At the root of the project
cd data/
bash script_download_all_datasets.sh
```
Script [script_download_all_datasets.sh](../data/script_download_all_datasets.sh) is located here.
<br>
## 4. MolHIV OGB dataset
```
# Ensure OGB is installed:
pip install ogb
```
If properly installed, the dataset will automatically be downloaded and saved to the ```dataset/``` folder after running a MolHIV experiment.
<br><br><br>
================================================
FILE: misc/env_installation.md
================================================
# Benchmark installation
<br>
## 1. Setup Conda
```
# Conda installation
# For Linux
curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
# For OSX
curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh
chmod +x ~/miniconda.sh
./miniconda.sh
source ~/.bashrc # For Linux
source ~/.bash_profile # For OSX
```
<br>
## 2. Setup Python environment for CPU
```
# Clone GitHub repo
conda install git
git clone https://github.com/DevinKreuzer/SAN.git
cd SAN
# Install python environment
# using pip
pip install -r requirements.txt
# using Conda
conda create --name <env_name> --file requirements.txt
```
<br>
## 3. Setup Python environment for GPU
DGL 0.5.x requires CUDA **10.2**.
For Ubuntu **18.04**
```
# Setup CUDA 10.2 on Ubuntu 18.04
sudo apt-get --purge remove "*cublas*" "cuda*"
sudo apt --purge remove "nvidia*"
sudo apt autoremove
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo apt update
sudo apt install -y cuda-10-2
sudo reboot
cat /usr/local/cuda/version.txt # Check CUDA version is 10.2
# Clone GitHub repo
conda install git
git clone https://github.com/DevinKreuzer/SAN.git
cd SAN
# Install python environment
# using pip
pip install -r requirements.txt
# using Conda
conda create --name <env_name> --file requirements.txt
```
<br><br><br>
================================================
FILE: nets/SBMs_node_classification/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN(nn.Module):
def __init__(self, net_params):
super().__init__()
in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
self.n_classes = net_params['n_classes']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim)
self.embedding_e = nn.Embedding(2, GT_hidden_dim)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)
def forward(self, g, h, e):
# input embedding
h=self.embedding_h(h)
h = self.in_feat_dropout(h)
e = self.embedding_e(e)
# GraphTransformer Layers
for conv in self.layers:
h, e = conv(g, h, e)
# output
h_out = self.MLP_layer(h)
return h_out
def loss(self, pred, label):
# calculating label weights for weighted loss computation
V = label.size(0)
label_count = torch.bincount(label)
label_count = label_count[label_count.nonzero()].squeeze()
cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
cluster_sizes[torch.unique(label)] = label_count
weight = (V - cluster_sizes).float() / V
weight *= (cluster_sizes>0).float()
# weighted cross-entropy for unbalanced classes
criterion = nn.CrossEntropyLoss(weight=weight)
loss = criterion(pred, label)
return loss
================================================
FILE: nets/SBMs_node_classification/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN_EdgeLPE(nn.Module):
def __init__(self, net_params):
super().__init__()
in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
self.n_classes = net_params['n_classes']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
LPE_layers = net_params['LPE_layers']
LPE_dim = net_params['LPE_dim']
LPE_n_heads = net_params['LPE_n_heads']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim)
self.embedding_e = nn.Embedding(2, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
self.linear_A = nn.Linear(3, LPE_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)
def forward(self, g, h, e, diff, product, EigVals):
# input embedding
h = self.embedding_h(h)
h = self.in_feat_dropout(h)
e = self.embedding_e(e)
PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3
PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
#1st Transformer: Learned PE
PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0])
#remove masked sequences
PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan')
#Sum pooling
PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim
#Concatenate learned PE to input embedding
e = torch.cat((e, PosEnc), 1)
# GraphTransformer Layers
for conv in self.layers:
h, e = conv(g, h, e)
# output
h_out = self.MLP_layer(h)
return h_out
def loss(self, pred, label):
# calculating label weights for weighted loss computation
V = label.size(0)
label_count = torch.bincount(label)
label_count = label_count[label_count.nonzero()].squeeze()
cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
cluster_sizes[torch.unique(label)] = label_count
weight = (V - cluster_sizes).float() / V
weight *= (cluster_sizes>0).float()
# weighted cross-entropy for unbalanced classes
criterion = nn.CrossEntropyLoss(weight=weight)
loss = criterion(pred, label)
return loss
================================================
FILE: nets/SBMs_node_classification/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN_NodeLPE(nn.Module):
def __init__(self, net_params):
super().__init__()
in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
self.n_classes = net_params['n_classes']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
LPE_layers = net_params['LPE_layers']
LPE_dim = net_params['LPE_dim']
LPE_n_heads = net_params['LPE_n_heads']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
self.embedding_e = nn.Embedding(2, GT_hidden_dim)
self.linear_A = nn.Linear(2, LPE_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)
def forward(self, g, h, e, EigVecs, EigVals):
# input embedding
h = self.embedding_h(h)
e = self.embedding_e(e)
PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2).float() # (Num nodes) x (Num Eigenvectors) x 2
empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2
PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num nodes) x 2
PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim
#1st Transformer: Learned PE
PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0])
#remove masked sequences
PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan')
#Sum pooling
PosEnc = torch.nansum(PosEnc, 0, keepdim=False)
#Concatenate learned PE to input embedding
h = torch.cat((h, PosEnc), 1)
h = self.in_feat_dropout(h)
# GraphTransformer Layers
for conv in self.layers:
h, e = conv(g, h, e)
# output
h_out = self.MLP_layer(h)
return h_out
def loss(self, pred, label):
# calculating label weights for weighted loss computation
V = label.size(0)
label_count = torch.bincount(label)
label_count = label_count[label_count.nonzero()].squeeze()
cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
cluster_sizes[torch.unique(label)] = label_count
weight = (V - cluster_sizes).float() / V
weight *= (cluster_sizes>0).float()
# weighted cross-entropy for unbalanced classes
criterion = nn.CrossEntropyLoss(weight=weight)
loss = criterion(pred, label)
return loss
================================================
FILE: nets/SBMs_node_classification/load_net.py
================================================
"""
Utility file to select GraphNN model as
selected by the user
"""
from nets.SBMs_node_classification.SAN_NodeLPE import SAN_NodeLPE
from nets.SBMs_node_classification.SAN_EdgeLPE import SAN_EdgeLPE
from nets.SBMs_node_classification.SAN import SAN
def NodeLPE(net_params):
return SAN_NodeLPE(net_params)
def EdgeLPE(net_params):
return SAN_EdgeLPE(net_params)
def NoLPE(net_params):
return SAN(net_params)
def gnn_model(LPE, net_params):
model = {
'edge': EdgeLPE,
'node': NodeLPE,
'none': NoLPE
}
return model[LPE](net_params)
================================================
FILE: nets/ZINC_graph_regression/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer with edge features
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN(nn.Module):
def __init__(self, net_params):
super().__init__()
num_atom_type = net_params['num_atom_type']
num_bond_type = net_params['num_bond_type']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim)
self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, 1) # 1 out dim since regression problem
def forward(self, g, h, e):
# input embedding
h = self.embedding_h(h)
h = self.in_feat_dropout(h)
e = self.embedding_e(e)
# GNN
for conv in self.layers:
h, e = conv(g, h, e)
g.ndata['h'] = h
if self.readout == "sum":
hg = dgl.sum_nodes(g, 'h')
elif self.readout == "max":
hg = dgl.max_nodes(g, 'h')
elif self.readout == "mean":
hg = dgl.mean_nodes(g, 'h')
else:
hg = dgl.mean_nodes(g, 'h') # default readout is mean nodes
return self.MLP_layer(hg)
def loss(self, scores, targets):
loss = nn.L1Loss()(scores, targets)
return loss
================================================
FILE: nets/ZINC_graph_regression/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer with edge features
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN_EdgeLPE(nn.Module):
def __init__(self, net_params):
super().__init__()
num_atom_type = net_params['num_atom_type']
num_bond_type = net_params['num_bond_type']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
LPE_layers = net_params['LPE_layers']
LPE_dim = net_params['LPE_dim']
LPE_n_heads = net_params['LPE_n_heads']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim)
self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
self.linear_A = nn.Linear(3, LPE_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, 1) # 1 out dim since regression problem
def forward(self, g, h, e, diff, product, EigVals):
# input embedding
h = self.embedding_h(h)
h = self.in_feat_dropout(h)
e = self.embedding_e(e)
PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3
PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
#1st Transformer: Learned PE
PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0])
#remove masked sequences
PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan')
#Sum pooling
PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim
#Concatenate learned PE to input embedding
e = torch.cat((e, PosEnc), 1)
# GNN
for conv in self.layers:
h, e = conv(g, h, e)
g.ndata['h'] = h
if self.readout == "sum":
hg = dgl.sum_nodes(g, 'h')
elif self.readout == "max":
hg = dgl.max_nodes(g, 'h')
elif self.readout == "mean":
hg = dgl.mean_nodes(g, 'h')
else:
hg = dgl.mean_nodes(g, 'h') # default readout is mean nodes
return self.MLP_layer(hg)
def loss(self, scores, targets):
loss = nn.L1Loss()(scores, targets)
return loss
================================================
FILE: nets/ZINC_graph_regression/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
import numpy as np
"""
Graph Transformer with edge features
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN_NodeLPE(nn.Module):
def __init__(self, net_params):
super().__init__()
num_atom_type = net_params['num_atom_type']
num_bond_type = net_params['num_bond_type']
full_graph = net_params['full_graph']
gamma = net_params['gamma']
LPE_layers = net_params['LPE_layers']
LPE_dim = net_params['LPE_dim']
LPE_n_heads = net_params['LPE_n_heads']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim)
self.linear_A = nn.Linear(2, LPE_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, 1) # 1 out dim since regression problem
def forward(self, g, h, e, EigVecs, EigVals):
# input embedding
h = self.embedding_h(h)
e = self.embedding_e(e)
PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2).float() # (Num nodes) x (Num Eigenvectors) x 2
empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2
PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num nodes) x 2
PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim
#1st Transformer: Learned PE
PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0])
#remove masked sequences
PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan')
#Sum pooling
PosEnc = torch.nansum(PosEnc, 0, keepdim=False)
#Concatenate learned PE to input embedding
h = torch.cat((h, PosEnc), 1)
h = self.in_feat_dropout(h)
# GNN
for conv in self.layers:
h, e = conv(g, h, e)
g.ndata['h'] = h
if self.readout == "sum":
hg = dgl.sum_nodes(g, 'h')
elif self.readout == "max":
hg = dgl.max_nodes(g, 'h')
elif self.readout == "mean":
hg = dgl.mean_nodes(g, 'h')
else:
hg = dgl.mean_nodes(g, 'h') # default readout is mean nodes
return self.MLP_layer(hg)
def loss(self, scores, targets):
loss = nn.L1Loss()(scores, targets)
return loss
================================================
FILE: nets/ZINC_graph_regression/load_net.py
================================================
from nets.ZINC_graph_regression.SAN_NodeLPE import SAN_NodeLPE
from nets.ZINC_graph_regression.SAN_EdgeLPE import SAN_EdgeLPE
from nets.ZINC_graph_regression.SAN import SAN
def NodeLPE(net_params):
return SAN_NodeLPE(net_params)
def EdgeLPE(net_params):
return SAN_EdgeLPE(net_params)
def NoLPE(net_params):
return SAN(net_params)
def gnn_model(LPE, net_params):
model = {
'edge': EdgeLPE,
'node': NodeLPE,
'none': NoLPE
}
return model[LPE](net_params)
================================================
FILE: nets/molhiv_graph_regression/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import dgl
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
"""
Graph Transformer with edge features
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout
class SAN(nn.Module):
def __init__(self, net_params):
super().__init__()
full_graph = net_params['full_graph']
gamma = net_params['gamma']
GT_layers = net_params['GT_layers']
GT_hidden_dim = net_params['GT_hidden_dim']
GT_out_dim = net_params['GT_out_dim']
GT_n_heads = net_params['GT_n_heads']
self.residual = net_params['residual']
self.readout = net_params['readout']
in_feat_dropout = net_params['in_feat_dropout']
dropout = net_params['dropout']
self.readout = net_params['readout']
self.layer_norm = net_params['layer_norm']
self.batch_norm = net_params['batch_norm']
self.device = net_params['device']
self.in_feat_dropout = nn.Dropout(in_feat_dropout)
self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim)
self.embedding_e = BondEncoder(emb_dim = GT_hidden_dim)
self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
self.MLP_layer = MLPReadout(GT_out_dim, 1) # out dim for probability
def forward(self, g, h, e):
# input embedding
h = self.embedding_h(h)
h = self.in_feat_dropout(h)
e = self.embedding_e(e)
# Second Transformer
for conv in self.layers:
h, e = conv(g, h, e)
g.ndata['h'] = h
if self.readout == "sum":
hg = dgl.sum_nodes(g, 'h')
elif self.readout == "max":
hg = dgl.max_nodes(g, 'h'
gitextract_q068fqi7/
├── .gitignore
├── LICENSE
├── README.md
├── configs/
│ ├── CLUSTER/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-1/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-2/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── MOLHIV/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── MOLPCBA/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ ├── PATTERN/
│ │ ├── ablation/
│ │ │ ├── full/
│ │ │ │ ├── 1e-1/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-2/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-3/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-4/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-5/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-6/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-7/
│ │ │ │ │ └── node
│ │ │ │ ├── 1e-8/
│ │ │ │ │ └── node
│ │ │ │ └── none
│ │ │ └── sparse/
│ │ │ ├── node
│ │ │ └── none
│ │ └── optimized
│ └── ZINC/
│ ├── ablation/
│ │ ├── full/
│ │ │ ├── 1e-2/
│ │ │ │ └── node
│ │ │ ├── 1e-3/
│ │ │ │ └── node
│ │ │ ├── 1e-4/
│ │ │ │ └── node
│ │ │ ├── 1e-5/
│ │ │ │ └── node
│ │ │ ├── 1e-6/
│ │ │ │ └── node
│ │ │ ├── 1e-7/
│ │ │ │ └── node
│ │ │ ├── 1e-8/
│ │ │ │ └── node
│ │ │ └── none
│ │ └── sparse/
│ │ ├── node
│ │ └── none
│ └── optimized
├── data/
│ ├── SBMs.py
│ ├── data.py
│ ├── molecules.py
│ ├── molhiv.py
│ ├── molpcba.py
│ ├── script_download_SBMs.sh
│ ├── script_download_all_datasets.sh
│ └── script_download_molecules.sh
├── layers/
│ ├── graph_transformer_layer.py
│ └── mlp_readout_layer.py
├── main_SBMs_node_classification.py
├── main_ZINC_graph_regression.py
├── main_molhiv.py
├── main_molpcba.py
├── misc/
│ ├── download_datasets.md
│ └── env_installation.md
├── nets/
│ ├── SBMs_node_classification/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ ├── ZINC_graph_regression/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ ├── molhiv_graph_regression/
│ │ ├── SAN.py
│ │ ├── SAN_EdgeLPE.py
│ │ ├── SAN_NodeLPE.py
│ │ └── load_net.py
│ └── molpcba/
│ ├── SAN.py
│ ├── SAN_EdgeLPE.py
│ ├── SAN_NodeLPE.py
│ └── load_net.py
├── requirements.txt
├── scripts/
│ ├── CLUSTER/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-1
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── cluster_optimized
│ ├── MOLHIV/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── molhiv_optimized
│ ├── MOLPCBA/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── molpcba_optimized
│ ├── PATTERN/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-1
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── pattern_optimized
│ ├── ZINC/
│ │ ├── ablation/
│ │ │ ├── full_node_1e-2
│ │ │ ├── full_node_1e-3
│ │ │ ├── full_node_1e-4
│ │ │ ├── full_node_1e-5
│ │ │ ├── full_node_1e-6
│ │ │ ├── full_node_1e-7
│ │ │ ├── full_node_1e-8
│ │ │ ├── full_none
│ │ │ ├── sparse_node
│ │ │ └── sparse_none
│ │ └── optimized/
│ │ └── zinc_optimized
│ └── reproduce.md
└── train/
├── MetricWrapper.py
├── metrics.py
├── train_SBMs_node_classification.py
├── train_ZINC_graph_regression.py
├── train_molhiv.py
└── train_molpcba.py
SYMBOL INDEX (171 symbols across 33 files)
FILE: data/SBMs.py
class load_SBMsDataSetDGL (line 18) | class load_SBMsDataSetDGL(torch.utils.data.Dataset):
method __init__ (line 20) | def __init__(self,
method _prepare (line 35) | def _prepare(self):
method __len__ (line 60) | def __len__(self):
method __getitem__ (line 64) | def __getitem__(self, idx):
class SBMsDatasetDGL (line 80) | class SBMsDatasetDGL(torch.utils.data.Dataset):
method __init__ (line 82) | def __init__(self, name):
function laplace_decomp (line 98) | def laplace_decomp(g, max_freqs):
function make_full_graph (line 137) | def make_full_graph(g):
function add_edge_laplace_feats (line 162) | def add_edge_laplace_feats(g):
class SBMsDataset (line 184) | class SBMsDataset(torch.utils.data.Dataset):
method __init__ (line 186) | def __init__(self, name):
method collate (line 205) | def collate(self, samples):
method _laplace_decomp (line 214) | def _laplace_decomp(self, max_freqs):
method _make_full_graph (line 220) | def _make_full_graph(self):
method _add_edge_laplace_feats (line 226) | def _add_edge_laplace_feats(self):
FILE: data/data.py
function LoadData (line 9) | def LoadData(DATASET_NAME):
FILE: data/molecules.py
class MoleculeDGL (line 21) | class MoleculeDGL(torch.utils.data.Dataset):
method __init__ (line 22) | def __init__(self, data_dir, split, num_graphs=None):
method _prepare (line 53) | def _prepare(self):
method __len__ (line 77) | def __len__(self):
method __getitem__ (line 81) | def __getitem__(self, idx):
class MoleculeDatasetDGL (line 97) | class MoleculeDatasetDGL(torch.utils.data.Dataset):
method __init__ (line 98) | def __init__(self, name='Zinc'):
function laplace_decomp (line 120) | def laplace_decomp(g, max_freqs):
function make_full_graph (line 159) | def make_full_graph(g):
function add_edge_laplace_feats (line 184) | def add_edge_laplace_feats(g):
class MoleculeDataset (line 205) | class MoleculeDataset(torch.utils.data.Dataset):
method __init__ (line 207) | def __init__(self, name):
method collate (line 227) | def collate(self, samples):
method _laplace_decomp (line 235) | def _laplace_decomp(self, max_freqs):
method _make_full_graph (line 241) | def _make_full_graph(self):
method _add_edge_laplace_feats (line 247) | def _add_edge_laplace_feats(self):
FILE: data/molhiv.py
function laplace_decomp (line 19) | def laplace_decomp(graph, max_freqs):
function make_full_graph (line 56) | def make_full_graph(graph):
function add_edge_laplace_feats (line 82) | def add_edge_laplace_feats(graph):
class MolHIVDataset (line 104) | class MolHIVDataset(torch.utils.data.Dataset):
method __init__ (line 106) | def __init__(self, name):
method collate (line 129) | def collate(self, samples):
method _laplace_decomp (line 136) | def _laplace_decomp(self, max_freqs):
method _make_full_graph (line 141) | def _make_full_graph(self):
method _add_edge_laplace_feats (line 146) | def _add_edge_laplace_feats(self):
FILE: data/molpcba.py
function laplace_decomp (line 21) | def laplace_decomp(graph, max_freqs):
function make_full_graph (line 62) | def make_full_graph(graph):
function add_edge_laplace_feats (line 88) | def add_edge_laplace_feats(graph):
class MolPCBADataset (line 110) | class MolPCBADataset(torch.utils.data.Dataset):
method __init__ (line 112) | def __init__(self, name):
method collate (line 135) | def collate(self, samples):
method _laplace_decomp (line 142) | def _laplace_decomp(self, max_freqs):
method _make_full_graph (line 147) | def _make_full_graph(self):
method _add_edge_laplace_feats (line 152) | def _add_edge_laplace_feats(self):
FILE: layers/graph_transformer_layer.py
function src_dot_dst (line 17) | def src_dot_dst(src_field, dst_field, out_field):
function scaling (line 23) | def scaling(field, scale_constant):
function imp_exp_attn (line 29) | def imp_exp_attn(implicit_attn, explicit_edge):
function exp_real (line 39) | def exp_real(field, L):
function exp_fake (line 46) | def exp_fake(field, L):
function exp (line 52) | def exp(field):
class MultiHeadAttentionLayer (line 63) | class MultiHeadAttentionLayer(nn.Module):
method __init__ (line 64) | def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, use_...
method propagate_attention (line 97) | def propagate_attention(self, g):
method forward (line 137) | def forward(self, g, h, e):
class GraphTransformerLayer (line 172) | class GraphTransformerLayer(nn.Module):
method __init__ (line 176) | def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, drop...
method forward (line 208) | def forward(self, g, h, e):
method __repr__ (line 249) | def __repr__(self):
FILE: layers/mlp_readout_layer.py
class MLPReadout (line 9) | class MLPReadout(nn.Module):
method __init__ (line 11) | def __init__(self, input_dim, output_dim, L=2): # L=nb_hidden_layers
method forward (line 18) | def forward(self, x):
FILE: main_SBMs_node_classification.py
class DotDict (line 25) | class DotDict(dict):
method __init__ (line 26) | def __init__(self, **kwds):
function gpu_setup (line 42) | def gpu_setup(use_gpu, gpu_id):
function view_model_param (line 59) | def view_model_param(LPE, net_params):
function train_val_pipeline (line 79) | def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
function main (line 243) | def main():
FILE: main_ZINC_graph_regression.py
function gpu_setup (line 38) | def gpu_setup(use_gpu, gpu_id):
function view_model_param (line 55) | def view_model_param(LPE, net_params):
function train_val_pipeline (line 75) | def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
function main (line 240) | def main():
FILE: main_molhiv.py
function gpu_setup (line 37) | def gpu_setup(use_gpu, gpu_id):
function view_model_param (line 54) | def view_model_param(LPE, net_params):
function train_val_pipeline (line 74) | def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
function main (line 247) | def main():
FILE: main_molpcba.py
function gpu_setup (line 37) | def gpu_setup(use_gpu, gpu_id):
function view_model_param (line 54) | def view_model_param(LPE, net_params):
function train_val_pipeline (line 74) | def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
function main (line 247) | def main():
FILE: nets/SBMs_node_classification/SAN.py
class SAN (line 15) | class SAN(nn.Module):
method __init__ (line 17) | def __init__(self, net_params):
method forward (line 55) | def forward(self, g, h, e):
method loss (line 73) | def loss(self, pred, label):
FILE: nets/SBMs_node_classification/SAN_EdgeLPE.py
class SAN_EdgeLPE (line 15) | class SAN_EdgeLPE(nn.Module):
method __init__ (line 17) | def __init__(self, net_params):
method forward (line 60) | def forward(self, g, h, e, diff, product, EigVals):
method loss (line 97) | def loss(self, pred, label):
FILE: nets/SBMs_node_classification/SAN_NodeLPE.py
class SAN_NodeLPE (line 15) | class SAN_NodeLPE(nn.Module):
method __init__ (line 17) | def __init__(self, net_params):
method forward (line 61) | def forward(self, g, h, e, EigVecs, EigVals):
method loss (line 99) | def loss(self, pred, label):
FILE: nets/SBMs_node_classification/load_net.py
function NodeLPE (line 11) | def NodeLPE(net_params):
function EdgeLPE (line 14) | def EdgeLPE(net_params):
function NoLPE (line 17) | def NoLPE(net_params):
function gnn_model (line 20) | def gnn_model(LPE, net_params):
FILE: nets/ZINC_graph_regression/SAN.py
class SAN (line 15) | class SAN(nn.Module):
method __init__ (line 16) | def __init__(self, net_params):
method forward (line 51) | def forward(self, g, h, e):
method loss (line 74) | def loss(self, scores, targets):
FILE: nets/ZINC_graph_regression/SAN_EdgeLPE.py
class SAN_EdgeLPE (line 15) | class SAN_EdgeLPE(nn.Module):
method __init__ (line 16) | def __init__(self, net_params):
method forward (line 59) | def forward(self, g, h, e, diff, product, EigVals):
method loss (line 102) | def loss(self, scores, targets):
FILE: nets/ZINC_graph_regression/SAN_NodeLPE.py
class SAN_NodeLPE (line 15) | class SAN_NodeLPE(nn.Module):
method __init__ (line 16) | def __init__(self, net_params):
method forward (line 59) | def forward(self, g, h, e, EigVecs, EigVals):
method loss (line 104) | def loss(self, scores, targets):
FILE: nets/ZINC_graph_regression/load_net.py
function NodeLPE (line 7) | def NodeLPE(net_params):
function EdgeLPE (line 10) | def EdgeLPE(net_params):
function NoLPE (line 13) | def NoLPE(net_params):
function gnn_model (line 16) | def gnn_model(LPE, net_params):
FILE: nets/molhiv_graph_regression/SAN.py
class SAN (line 17) | class SAN(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 51) | def forward(self, g, h, e):
method loss (line 76) | def loss(self, scores, targets):
FILE: nets/molhiv_graph_regression/SAN_EdgeLPE.py
class SAN_EdgeLPE (line 17) | class SAN_EdgeLPE(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 58) | def forward(self, g, h, e, diff, product, EigVals):
method loss (line 104) | def loss(self, scores, targets):
FILE: nets/molhiv_graph_regression/SAN_NodeLPE.py
class SAN_NodeLPE (line 17) | class SAN_NodeLPE(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 59) | def forward(self, g, h, e, EigVecs, EigVals):
method loss (line 106) | def loss(self, scores, targets):
FILE: nets/molhiv_graph_regression/load_net.py
function NodeLPE (line 6) | def NodeLPE(net_params):
function EdgeLPE (line 9) | def EdgeLPE(net_params):
function NoLPE (line 12) | def NoLPE(net_params):
function gnn_model (line 15) | def gnn_model(LPE, net_params):
FILE: nets/molpcba/SAN.py
class SAN (line 17) | class SAN(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 51) | def forward(self, g, h, e):
method loss (line 76) | def loss(self, scores, targets):
FILE: nets/molpcba/SAN_EdgeLPE.py
class SAN_EdgeLPE (line 17) | class SAN_EdgeLPE(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 58) | def forward(self, g, h, e, diff, product, EigVals):
method loss (line 104) | def loss(self, scores, targets):
FILE: nets/molpcba/SAN_NodeLPE.py
class SAN_NodeLPE (line 17) | class SAN_NodeLPE(nn.Module):
method __init__ (line 18) | def __init__(self, net_params):
method forward (line 69) | def forward(self, g, h, e, EigVecs, EigVals):
method loss (line 126) | def loss(self, scores, targets):
FILE: nets/molpcba/load_net.py
function NodeLPE (line 6) | def NodeLPE(net_params):
function EdgeLPE (line 9) | def EdgeLPE(net_params):
function NoLPE (line 12) | def NoLPE(net_params):
function gnn_model (line 15) | def gnn_model(LPE, net_params):
FILE: train/MetricWrapper.py
class MetricWrapper (line 6) | class MetricWrapper:
method __init__ (line 12) | def __init__(
method compute (line 45) | def compute(self, preds: torch.Tensor, target: torch.Tensor) -> torch....
method __call__ (line 93) | def __call__(self, preds: torch.Tensor, target: torch.Tensor) -> torch...
method __repr__ (line 99) | def __repr__(self):
method nan_mean (line 107) | def nan_mean(self, input: Tensor, **kwargs) -> Tensor:
FILE: train/metrics.py
function MAE (line 10) | def MAE(scores, targets):
function accuracy_TU (line 16) | def accuracy_TU(scores, targets):
function accuracy_MNIST_CIFAR (line 22) | def accuracy_MNIST_CIFAR(scores, targets):
function accuracy_CITATION_GRAPH (line 27) | def accuracy_CITATION_GRAPH(scores, targets):
function accuracy_SBM (line 34) | def accuracy_SBM(scores, targets):
function binary_f1_score (line 54) | def binary_f1_score(scores, targets):
function accuracy_VOC (line 64) | def accuracy_VOC(scores, targets):
FILE: train/train_SBMs_node_classification.py
function train_epoch (line 12) | def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
function evaluate_network (line 55) | def evaluate_network(model, device, data_loader, epoch, LPE):
FILE: train/train_ZINC_graph_regression.py
function train_epoch (line 11) | def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
function evaluate_network (line 53) | def evaluate_network(model, device, data_loader, epoch, LPE):
FILE: train/train_molhiv.py
function train_epoch (line 11) | def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
function evaluate_network (line 68) | def evaluate_network(model, device, data_loader, epoch, LPE):
FILE: train/train_molpcba.py
function train_epoch (line 14) | def train_epoch(model, optimizer, device, data_loader, epoch, LPE, batch...
function evaluate_network (line 76) | def evaluate_network(model, device, data_loader, epoch, LPE):
Condensed preview — 153 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (282K chars).
[
{
"path": ".gitignore",
"chars": 1870,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "LICENSE",
"chars": 1094,
"preview": "MIT License\n\nCopyright (c) 2020 Vijay Prakash Dwivedi, Xavier Bresson\n\nPermission is hereby granted, free of charge, to "
},
{
"path": "README.md",
"chars": 984,
"preview": "# SAN\n\nImplementation of Spectral Attention Networks, a powerful GNN that leverages key principles from spectral graph t"
},
{
"path": "configs/CLUSTER/ablation/full/1e-1/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-2/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-3/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-4/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-5/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-6/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-7/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/1e-8/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/full/none",
"chars": 960,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/sparse/node",
"chars": 963,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/ablation/sparse/none",
"chars": 963,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/CLUSTER/optimized",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_CLU"
},
{
"path": "configs/MOLHIV/ablation/full/1e-3/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/1e-4/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/1e-5/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/1e-6/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/1e-7/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/1e-8/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/full/none",
"chars": 925,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/sparse/node",
"chars": 927,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/ablation/sparse/none",
"chars": 927,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLHIV/optimized",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-3/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-4/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-5/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-6/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-7/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/1e-8/node",
"chars": 931,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/full/none",
"chars": 925,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/sparse/node",
"chars": 927,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/ablation/sparse/none",
"chars": 927,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-HIV\",\n\n"
},
{
"path": "configs/MOLPCBA/optimized",
"chars": 1004,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n\n \"model\": \"GraphTransformer\",\n \"dataset\": \"MOL-PCBA\",\n"
},
{
"path": "configs/PATTERN/ablation/full/1e-1/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-2/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-3/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-4/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-5/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-6/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-7/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/1e-8/node",
"chars": 965,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/full/none",
"chars": 960,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/sparse/node",
"chars": 963,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/ablation/sparse/none",
"chars": 963,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/PATTERN/optimized",
"chars": 966,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"SBM_PAT"
},
{
"path": "configs/ZINC/ablation/full/1e-2/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-3/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-4/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-5/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-6/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-7/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/1e-8/node",
"chars": 943,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/full/none",
"chars": 938,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/sparse/node",
"chars": 941,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/ablation/sparse/none",
"chars": 941,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "configs/ZINC/optimized",
"chars": 942,
"preview": "{\n \"gpu\": {\n \"use\": true,\n \"id\": 0\n },\n \n \"model\": \"GraphTransformer\",\n \"dataset\": \"ZINC\",\n"
},
{
"path": "data/SBMs.py",
"chars": 7394,
"preview": "\nimport time\nimport os\nimport pickle\nimport numpy as np\n\nimport dgl\nimport torch\nimport torch.nn.functional as F\n\nfrom s"
},
{
"path": "data/data.py",
"chars": 821,
"preview": "\"\"\"\n File to load dataset based on user control from main file\n\"\"\"\nfrom data.molecules import MoleculeDataset\nfrom da"
},
{
"path": "data/molecules.py",
"chars": 8919,
"preview": "import torch\nimport pickle\nimport torch.utils.data\nimport time\nimport os\nimport numpy as np\n\nimport csv\n\nimport dgl\nimpo"
},
{
"path": "data/molhiv.py",
"chars": 5074,
"preview": "import torch\nimport pickle\nimport torch.utils.data\nimport time\nimport os\nimport numpy as np\n\nimport csv\n\nimport dgl\nimpo"
},
{
"path": "data/molpcba.py",
"chars": 5267,
"preview": "import torch\nimport pickle\nimport torch.utils.data\nimport time\nimport os\nimport numpy as np\n\nimport csv\n\nimport dgl\nimpo"
},
{
"path": "data/script_download_SBMs.sh",
"chars": 535,
"preview": "\n\n# Command to download dataset:\n# bash script_download_SBMs.sh\n\n\nmkdir SBMs\ncd SBMs\n\n\nFILE=SBM_CLUSTER.pkl\nif test -f"
},
{
"path": "data/script_download_all_datasets.sh",
"chars": 882,
"preview": "\n\n# Command to download dataset:\n# bash script_download_all_datasets.sh\n\n\n\n############\n# ZINC\n############\n\nmkdir mol"
},
{
"path": "data/script_download_molecules.sh",
"chars": 307,
"preview": "\n\n# Command to download dataset:\n# bash script_download_molecules.sh\n\n\nmkdir molecules/\ncd molecules\n\n\nFILE=ZINC.pkl\ni"
},
{
"path": "layers/graph_transformer_layer.py",
"chars": 8113,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport dgl.function as fn\nimport numpy as"
},
{
"path": "layers/mlp_readout_layer.py",
"chars": 713,
"preview": "import torch.nn as nn\nimport torch.nn.functional as F\n\n\"\"\"\n MLP Layer used after graph vector representation\n\"\"\"\n\n\ncl"
},
{
"path": "main_SBMs_node_classification.py",
"chars": 16710,
"preview": "\"\"\"\n IMPORTING LIBS\n\"\"\"\nimport dgl\n\nimport numpy as np\nimport os\nimport socket\nimport time\nimport random\nimport glob\n"
},
{
"path": "main_ZINC_graph_regression.py",
"chars": 16686,
"preview": "\"\"\"\n IMPORTING LIBS\n\"\"\"\nimport dgl\n\nimport numpy as np\nimport os\nimport socket\nimport time\nimport random\nimport glob\n"
},
{
"path": "main_molhiv.py",
"chars": 16100,
"preview": "\"\"\"\n IMPORTING LIBS\n\"\"\"\nimport dgl\n\nimport numpy as np\nimport os\nimport socket\nimport time\nimport random\nimport glob\n"
},
{
"path": "main_molpcba.py",
"chars": 16437,
"preview": "\"\"\"\n IMPORTING LIBS\n\"\"\"\nimport dgl\n\nimport numpy as np\nimport os\nimport socket\nimport time\nimport random\nimport glob\n"
},
{
"path": "misc/download_datasets.md",
"chars": 1369,
"preview": "# Download datasets\n\nAll the datasets work with DGL 0.5.x or later. Please update the environment using the yml files in"
},
{
"path": "misc/env_installation.md",
"chars": 1634,
"preview": "# Benchmark installation\n\n\n\n<br>\n\n## 1. Setup Conda\n\n```\n# Conda installation\n\n# For Linux\ncurl -o ~/miniconda.sh -O htt"
},
{
"path": "nets/SBMs_node_classification/SAN.py",
"chars": 2828,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/SBMs_node_classification/SAN_EdgeLPE.py",
"chars": 4207,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/SBMs_node_classification/SAN_NodeLPE.py",
"chars": 4221,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/SBMs_node_classification/load_net.py",
"chars": 601,
"preview": "\"\"\"\n Utility file to select GraphNN model as\n selected by the user\n\"\"\"\n\nfrom nets.SBMs_node_classification.SAN_Nod"
},
{
"path": "nets/ZINC_graph_regression/SAN.py",
"chars": 2637,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/ZINC_graph_regression/SAN_EdgeLPE.py",
"chars": 4021,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/ZINC_graph_regression/SAN_NodeLPE.py",
"chars": 4042,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport dgl\nimport numpy as np\n\n\"\"\"\n Graph Transfo"
},
{
"path": "nets/ZINC_graph_regression/load_net.py",
"chars": 515,
"preview": "\nfrom nets.ZINC_graph_regression.SAN_NodeLPE import SAN_NodeLPE\nfrom nets.ZINC_graph_regression.SAN_EdgeLPE import SAN_E"
},
{
"path": "nets/molhiv_graph_regression/SAN.py",
"chars": 2665,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molhiv_graph_regression/SAN_EdgeLPE.py",
"chars": 4049,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molhiv_graph_regression/SAN_NodeLPE.py",
"chars": 4111,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molhiv_graph_regression/load_net.py",
"chars": 521,
"preview": "\nfrom nets.molhiv_graph_regression.SAN_NodeLPE import SAN_NodeLPE\nfrom nets.molhiv_graph_regression.SAN_EdgeLPE import S"
},
{
"path": "nets/molpcba/SAN.py",
"chars": 2667,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molpcba/SAN_EdgeLPE.py",
"chars": 4051,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molpcba/SAN_NodeLPE.py",
"chars": 4635,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\n\nimport dgl\n\nfrom ogb.graphproppre"
},
{
"path": "nets/molpcba/load_net.py",
"chars": 473,
"preview": "\nfrom nets.molpcba.SAN_NodeLPE import SAN_NodeLPE\nfrom nets.molpcba.SAN_EdgeLPE import SAN_EdgeLPE\nfrom nets.molpcba.SAN"
},
{
"path": "requirements.txt",
"chars": 2212,
"preview": "absl-py==0.11.0\nargon2-cffi==20.1.0\nase==3.20.1\nastunparse==1.6.3\nasync-generator==1.10\nattrs==20.3.0\nbackcall==0.2.0\nbl"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-1",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-2",
"chars": 522,
"preview": "#!/bin/bash\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTE"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-3",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-4",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-5",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-6",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-7",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_node_1e-8",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/full_none",
"chars": 500,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/sparse_node",
"chars": 508,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/ablation/sparse_none",
"chars": 508,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/CLUSTER/optimized/cluster_optimized",
"chars": 467,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUST"
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-3",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-4",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-5",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-6",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-7",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_node_1e-8",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLHIV/ablation/full_none",
"chars": 296,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'\npython main_molhiv.py --con"
},
{
"path": "scripts/MOLHIV/ablation/sparse_node",
"chars": 304,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'\npython main_molhiv.py --c"
},
{
"path": "scripts/MOLHIV/ablation/sparse_none",
"chars": 304,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'\npython main_molhiv.py --c"
},
{
"path": "scripts/MOLHIV/optimized/molhiv_optimized",
"chars": 263,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/optimized'\npython main_molhiv.py --config 'conf"
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-3",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-4",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-5",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-6",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-7",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_node_1e-8",
"chars": 319,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'\npython main_molhiv.py "
},
{
"path": "scripts/MOLPCBA/ablation/full_none",
"chars": 296,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'\npython main_molhiv.py --con"
},
{
"path": "scripts/MOLPCBA/ablation/sparse_node",
"chars": 304,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'\npython main_molhiv.py --c"
},
{
"path": "scripts/MOLPCBA/ablation/sparse_none",
"chars": 304,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'\npython main_molhiv.py --c"
},
{
"path": "scripts/MOLPCBA/optimized/molpcba_optimized",
"chars": 271,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_molpcba.py --config 'configs/MOLPCBA/optimized'\npython main_molpcba.py --config 'c"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-1",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-2",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-3",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-4",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-5",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-6",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-7",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_node_1e-8",
"chars": 523,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/full_none",
"chars": 499,
"preview": "#!/bin/bash\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTER"
},
{
"path": "scripts/PATTERN/ablation/sparse_node",
"chars": 508,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/ablation/sparse_none",
"chars": 508,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/PATTERN/optimized/pattern_optimized",
"chars": 467,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTE"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-2",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-2/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-3",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-3/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-4",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-4/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-5",
"chars": 410,
"preview": "#!/bin/bash\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-5/node'\np"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-6",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-6/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-7",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-7/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_node_1e-8",
"chars": 411,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-8/node'\n"
},
{
"path": "scripts/ZINC/ablation/full_none",
"chars": 388,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/none'\npytho"
},
{
"path": "scripts/ZINC/ablation/sparse_node",
"chars": 396,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/sparse/node'\npyt"
},
{
"path": "scripts/ZINC/ablation/sparse_none",
"chars": 396,
"preview": "#!/bin/bash\n\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/sparse/none'\npyt"
},
{
"path": "scripts/ZINC/optimized/zinc_optimized",
"chars": 354,
"preview": "#!/bin/bash\ncd ~/SAN/\n\npython main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/optimized'\npython main_ZI"
},
{
"path": "scripts/reproduce.md",
"chars": 2097,
"preview": "# Reproducibility\n\n\n<br>\n\n\nAll outputs will be sent to the ```/out``` directory from the root of the project in the fold"
},
{
"path": "train/MetricWrapper.py",
"chars": 3842,
"preview": "from typing import Union, Callable, Optional, Dict, Any\nfrom copy import deepcopy\nimport torch\nfrom torch import Tensor\n"
},
{
"path": "train/metrics.py",
"chars": 1988,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom sklearn.metrics import confusion_matrix\nfrom sk"
},
{
"path": "train/train_SBMs_node_classification.py",
"chars": 3463,
"preview": "\"\"\"\n Utility function for training one epoch \n and evaluating one epoch\n\"\"\"\nimport torch\nimport torch.nn as nn\nimp"
},
{
"path": "train/train_ZINC_graph_regression.py",
"chars": 3378,
"preview": "\"\"\"\n Utility function for training one epoch \n and evaluating one epoch\n\"\"\"\nimport torch\nimport torch.nn as nn\nimp"
},
{
"path": "train/train_molhiv.py",
"chars": 4135,
"preview": "\"\"\"\n Utility functions for training one epoch \n and evaluating one epoch\n\"\"\"\nimport torch\nimport torch.nn as nn\nim"
},
{
"path": "train/train_molpcba.py",
"chars": 4223,
"preview": "\"\"\"\n Utility functions for training one epoch\n and evaluating one epoch\n\"\"\"\nimport torch\nfrom torch._C import dtyp"
}
]
About this extraction
This page contains the full source code of the DevinKreuzer/SAN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 153 files (251.5 KB), approximately 75.9k tokens, and a symbol index with 171 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.