Repository: DevinKreuzer/SAN
Branch: main
Commit: 6e38329957af
Files: 153
Total size: 251.5 KB

Directory structure:
gitextract_q068fqi7/

├── .gitignore
├── LICENSE
├── README.md
├── configs/
│   ├── CLUSTER/
│   │   ├── ablation/
│   │   │   ├── full/
│   │   │   │   ├── 1e-1/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-2/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-3/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-4/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-5/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-6/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-7/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-8/
│   │   │   │   │   └── node
│   │   │   │   └── none
│   │   │   └── sparse/
│   │   │       ├── node
│   │   │       └── none
│   │   └── optimized
│   ├── MOLHIV/
│   │   ├── ablation/
│   │   │   ├── full/
│   │   │   │   ├── 1e-3/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-4/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-5/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-6/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-7/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-8/
│   │   │   │   │   └── node
│   │   │   │   └── none
│   │   │   └── sparse/
│   │   │       ├── node
│   │   │       └── none
│   │   └── optimized
│   ├── MOLPCBA/
│   │   ├── ablation/
│   │   │   ├── full/
│   │   │   │   ├── 1e-3/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-4/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-5/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-6/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-7/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-8/
│   │   │   │   │   └── node
│   │   │   │   └── none
│   │   │   └── sparse/
│   │   │       ├── node
│   │   │       └── none
│   │   └── optimized
│   ├── PATTERN/
│   │   ├── ablation/
│   │   │   ├── full/
│   │   │   │   ├── 1e-1/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-2/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-3/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-4/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-5/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-6/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-7/
│   │   │   │   │   └── node
│   │   │   │   ├── 1e-8/
│   │   │   │   │   └── node
│   │   │   │   └── none
│   │   │   └── sparse/
│   │   │       ├── node
│   │   │       └── none
│   │   └── optimized
│   └── ZINC/
│       ├── ablation/
│       │   ├── full/
│       │   │   ├── 1e-2/
│       │   │   │   └── node
│       │   │   ├── 1e-3/
│       │   │   │   └── node
│       │   │   ├── 1e-4/
│       │   │   │   └── node
│       │   │   ├── 1e-5/
│       │   │   │   └── node
│       │   │   ├── 1e-6/
│       │   │   │   └── node
│       │   │   ├── 1e-7/
│       │   │   │   └── node
│       │   │   ├── 1e-8/
│       │   │   │   └── node
│       │   │   └── none
│       │   └── sparse/
│       │       ├── node
│       │       └── none
│       └── optimized
├── data/
│   ├── SBMs.py
│   ├── data.py
│   ├── molecules.py
│   ├── molhiv.py
│   ├── molpcba.py
│   ├── script_download_SBMs.sh
│   ├── script_download_all_datasets.sh
│   └── script_download_molecules.sh
├── layers/
│   ├── graph_transformer_layer.py
│   └── mlp_readout_layer.py
├── main_SBMs_node_classification.py
├── main_ZINC_graph_regression.py
├── main_molhiv.py
├── main_molpcba.py
├── misc/
│   ├── download_datasets.md
│   └── env_installation.md
├── nets/
│   ├── SBMs_node_classification/
│   │   ├── SAN.py
│   │   ├── SAN_EdgeLPE.py
│   │   ├── SAN_NodeLPE.py
│   │   └── load_net.py
│   ├── ZINC_graph_regression/
│   │   ├── SAN.py
│   │   ├── SAN_EdgeLPE.py
│   │   ├── SAN_NodeLPE.py
│   │   └── load_net.py
│   ├── molhiv_graph_regression/
│   │   ├── SAN.py
│   │   ├── SAN_EdgeLPE.py
│   │   ├── SAN_NodeLPE.py
│   │   └── load_net.py
│   └── molpcba/
│       ├── SAN.py
│       ├── SAN_EdgeLPE.py
│       ├── SAN_NodeLPE.py
│       └── load_net.py
├── requirements.txt
├── scripts/
│   ├── CLUSTER/
│   │   ├── ablation/
│   │   │   ├── full_node_1e-1
│   │   │   ├── full_node_1e-2
│   │   │   ├── full_node_1e-3
│   │   │   ├── full_node_1e-4
│   │   │   ├── full_node_1e-5
│   │   │   ├── full_node_1e-6
│   │   │   ├── full_node_1e-7
│   │   │   ├── full_node_1e-8
│   │   │   ├── full_none
│   │   │   ├── sparse_node
│   │   │   └── sparse_none
│   │   └── optimized/
│   │       └── cluster_optimized
│   ├── MOLHIV/
│   │   ├── ablation/
│   │   │   ├── full_node_1e-3
│   │   │   ├── full_node_1e-4
│   │   │   ├── full_node_1e-5
│   │   │   ├── full_node_1e-6
│   │   │   ├── full_node_1e-7
│   │   │   ├── full_node_1e-8
│   │   │   ├── full_none
│   │   │   ├── sparse_node
│   │   │   └── sparse_none
│   │   └── optimized/
│   │       └── molhiv_optimized
│   ├── MOLPCBA/
│   │   ├── ablation/
│   │   │   ├── full_node_1e-3
│   │   │   ├── full_node_1e-4
│   │   │   ├── full_node_1e-5
│   │   │   ├── full_node_1e-6
│   │   │   ├── full_node_1e-7
│   │   │   ├── full_node_1e-8
│   │   │   ├── full_none
│   │   │   ├── sparse_node
│   │   │   └── sparse_none
│   │   └── optimized/
│   │       └── molpcba_optimized
│   ├── PATTERN/
│   │   ├── ablation/
│   │   │   ├── full_node_1e-1
│   │   │   ├── full_node_1e-2
│   │   │   ├── full_node_1e-3
│   │   │   ├── full_node_1e-4
│   │   │   ├── full_node_1e-5
│   │   │   ├── full_node_1e-6
│   │   │   ├── full_node_1e-7
│   │   │   ├── full_node_1e-8
│   │   │   ├── full_none
│   │   │   ├── sparse_node
│   │   │   └── sparse_none
│   │   └── optimized/
│   │       └── pattern_optimized
│   ├── ZINC/
│   │   ├── ablation/
│   │   │   ├── full_node_1e-2
│   │   │   ├── full_node_1e-3
│   │   │   ├── full_node_1e-4
│   │   │   ├── full_node_1e-5
│   │   │   ├── full_node_1e-6
│   │   │   ├── full_node_1e-7
│   │   │   ├── full_node_1e-8
│   │   │   ├── full_none
│   │   │   ├── sparse_node
│   │   │   └── sparse_none
│   │   └── optimized/
│   │       └── zinc_optimized
│   └── reproduce.md
└── train/
    ├── MetricWrapper.py
    ├── metrics.py
    ├── train_SBMs_node_classification.py
    ├── train_ZINC_graph_regression.py
    ├── train_molhiv.py
    └── train_molpcba.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Custom GitIgnore
*.code-workspace
*.out
logs/
out/
dataset/
.vscode/

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2020 Vijay Prakash Dwivedi, Xavier Bresson

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# SAN

Implementation of Spectral Attention Networks, a powerful GNN that leverages key principles from spectral graph theory to enable full graph attention.

![full_method](https://user-images.githubusercontent.com/47570400/119883871-046aa280-befe-11eb-9063-108f4fe1a123.png)

# Overview

* ```nets``` contains the Node, Edge and no LPE architectures implemented with PyTorch.
* ```layers``` contains the multi-headed attention employed by the Main Graph Transformer implemented in DGL.
* ```train``` contains methods to train the models.
* ```data``` contains dataset classes and various methods used in precomputation.
* ```configs``` contains the various parameters used in the ablation and SOTA comparison studies.
* ```misc``` contains scripts from https://github.com/graphdeeplearning/graphtransformer to download datasets and setup environments.
* ```scripts``` contains scripts to reproduce ablation and SOTA comparison results. See ```scripts/reproduce.md``` for details.


================================================
FILE: configs/CLUSTER/ablation/full/1e-1/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-1/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-2/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-2/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-2,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-3/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-3/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-3,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-4/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-4/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-4,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-5/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-5/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-6/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-6/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-7/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-7/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-7,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/1e-8/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-8/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-8,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/full/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/none",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "none",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/sparse/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/sparse/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 56,
        "GT_out_dim": 56,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/ablation/sparse/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/sparse/none",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "none",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 56,
        "GT_out_dim": 56,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/CLUSTER/optimized
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_CLUSTER",
    
    "out_dir": "out/SBM_CLUSTER/full/1e-1/node",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-5,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 1,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 16,
        "GT_hidden_dim": 48,
        "GT_out_dim": 48,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-3/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-3/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-3,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-4/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-4/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-4,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-5/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-5/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-6/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-6/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-7/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-7/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-7,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/1e-8/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-8/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-8,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/full/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/none/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,

        "LPE": "none",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/sparse/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/sparse/node",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/ablation/sparse/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/sparse/none",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "none",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 96,
        "GT_out_dim": 96,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLHIV/optimized
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-6/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 10,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.01,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-3/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-3/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-3,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-4/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-4/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-4,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-5/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-5/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-6/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-6/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-7/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-7/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-7,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/1e-8/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/1e-8/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 128,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-8,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/full/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/full/none/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,

        "LPE": "none",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/sparse/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/sparse/node",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "node",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/ablation/sparse/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-HIV",

    "out_dir": "out/MOLHIV/sparse/none",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0001,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24
    },

    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,
        "m": 10,

        "LPE": "none",
        "LPE_layers": 2,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 96,
        "GT_out_dim": 96,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.03,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/MOLPCBA/optimized
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },

    "model": "GraphTransformer",
    "dataset": "MOL-PCBA",

    "out_dir": "out/MOLPCBA/full/1e-6/node/",

    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 512,
        "init_lr": 0.0003,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 20,
        "min_lr": 1e-5,
        "weight_decay": 0,
        "print_epoch_interval": 5,
        "max_time": 24,
        "batch_accumulation": 2
    },

    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,
        
        "extra_mlp": False,

        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 5,
        "GT_hidden_dim": 304,
        "GT_out_dim": 304,
        "GT_n_heads": 4,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.2,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-1/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-1/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-2/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-2/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-2,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-3/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-3/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-3,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-4/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-4/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-4,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-5/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-5/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-6/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-6/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-7/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-7/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-7,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/1e-8/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-8/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-8,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/full/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/none/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-1,
        "m": 10,
        
        "LPE": "none",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/sparse/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/sparse/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-2,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/ablation/sparse/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/sparse/none/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-2,
        "m": 10,
        
        "LPE": "none",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 96,
        "GT_out_dim": 96,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/PATTERN/optimized
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "SBM_PATTERN",
    
    "out_dir": "out/SBM_PATTERN/full/1e-2/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 16,
        "init_lr": 0.0005,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 10,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-2,
        "m": 10,
        
        "LPE": "node",
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 4,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 10,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-2/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-2/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-2,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-3/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-3/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-3,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-4/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-4/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-4,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-5/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-5/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-6/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-6/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-6,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-7/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-7/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-7,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/1e-8/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-8/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-8,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 64,
        "GT_out_dim": 64,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/full/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/none/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,

        "LPE": "none",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 80,
        "GT_out_dim": 80,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/sparse/node
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/sparse/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,

        "LPE": "node",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 72,
        "GT_out_dim": 72,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/ablation/sparse/none
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/sparse/none/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 64,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": false,
        "gamma": 1e-5,

        "LPE": "none",
        "m": 30,
        "LPE_layers": 3,
        "LPE_dim": 16,
        "LPE_n_heads": 4,

        "GT_layers": 6,
        "GT_hidden_dim": 96,
        "GT_out_dim": 96,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "mean",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: configs/ZINC/optimized
================================================
{
    "gpu": {
        "use": true,
        "id": 0
    },
    
    "model": "GraphTransformer",
    "dataset": "ZINC",
    
    "out_dir": "out/ZINC/full/1e-5/node/",
    
    "params": {
        "seed": 41,
        "epochs": 1000,
        "batch_size": 32,
        "init_lr": 0.0007,
        "lr_reduce_factor": 0.5,
        "lr_schedule_patience": 25,
        "min_lr": 1e-6,
        "weight_decay": 0.0,
        "print_epoch_interval": 5,
        "max_time": 24
    },
    
    "net_params": {
        "full_graph": true,
        "gamma": 1e-5,

        "LPE": "node",
        "m": 10,
        "LPE_layers": 2,
        "LPE_dim": 8,
        "LPE_n_heads": 4,

        "GT_layers": 10,
        "GT_hidden_dim": 56,
        "GT_out_dim": 56,
        "GT_n_heads": 8,

        "residual": true,
        "readout": "sum",
        "in_feat_dropout": 0.0,
        "dropout": 0.0,
        "layer_norm": false,
        "batch_norm": true
    }
}


================================================
FILE: data/SBMs.py
================================================

import time
import os
import pickle
import numpy as np

import dgl
import torch
import torch.nn.functional as F

from scipy import sparse as sp
import numpy as np
import networkx as nx

import hashlib


class load_SBMsDataSetDGL(torch.utils.data.Dataset):

    def __init__(self,
                 data_dir,
                 name,
                 split):

        self.split = split
        self.is_test = split.lower() in ['test', 'val'] 
        with open(os.path.join(data_dir, name + '_%s.pkl' % self.split), 'rb') as f:
            self.dataset = pickle.load(f)
        self.node_labels = []
        self.graph_lists = []
        self.n_samples = len(self.dataset)
        self._prepare()
    

    def _prepare(self):

        print("preparing %d graphs for the %s set..." % (self.n_samples, self.split.upper()))

        for data in self.dataset:

            node_features = data.node_feat
            edge_list = (data.W != 0).nonzero()  # converting adj matrix to edge_list

            # Create the DGL Graph
            g = dgl.DGLGraph()
            g.add_nodes(node_features.size(0))
            g.ndata['feat'] = node_features.long()
            for src, dst in edge_list:
                g.add_edges(src.item(), dst.item())

            # adding edge features for Residual Gated ConvNet
            #edge_feat_dim = g.ndata['feat'].size(1) # dim same as node feature dim
            edge_feat_dim = 1 # dim same as node feature dim
            g.edata['feat'] = torch.ones(g.number_of_edges(), edge_feat_dim)

            self.graph_lists.append(g)
            self.node_labels.append(data.node_label)


    def __len__(self):
        """Return the number of graphs in the dataset."""
        return self.n_samples

    def __getitem__(self, idx):
        """
            Get the idx^th sample.
            Parameters
            ---------
            idx : int
                The sample index.
            Returns
            -------
            (dgl.DGLGraph, int)
                DGLGraph with node feature stored in `feat` field
                And its label.
        """
        return self.graph_lists[idx], self.node_labels[idx]


class SBMsDatasetDGL(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            TODO
        """
        start = time.time()
        print("[I] Loading data ...")
        self.name = name
        data_dir = 'data/SBMs'
        self.train = load_SBMsDataSetDGL(data_dir, name, split='train')
        self.test = load_SBMsDataSetDGL(data_dir, name, split='test')
        self.val = load_SBMsDataSetDGL(data_dir, name, split='val')
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))


def laplace_decomp(g, max_freqs):


    # Laplacian
    n = g.number_of_nodes()
    A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
    N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
    L = sp.eye(g.number_of_nodes()) - N * A * N

    # Eigenvectors with numpy
    EigVals, EigVecs = np.linalg.eigh(L.toarray())
    EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs]  # Keep up to the maximum desired number of frequencies

    # Normalize and pad EigenVectors
    EigVecs = torch.from_numpy(EigVecs).float()
    EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
    
    if n<max_freqs:
        g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs-n), value=float('nan'))
    else:
        g.ndata['EigVecs']= EigVecs
        
    
    #Save eigenvales and pad
    EigVals = torch.from_numpy(np.sort(np.abs(np.real(EigVals)))) #Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
    
    if n<max_freqs:
        EigVals = F.pad(EigVals, (0, max_freqs-n), value=float('nan')).unsqueeze(0)
    else:
        EigVals=EigVals.unsqueeze(0)
        
    
    #Save EigVals node features
    g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(),1).unsqueeze(2)
    
    return g


def make_full_graph(g):

    
    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))

    #Here we copy over the node feature data and laplace encodings
    full_g.ndata['feat'] = g.ndata['feat']

    try:
        full_g.ndata['EigVecs'] = g.ndata['EigVecs']
        full_g.ndata['EigVals'] = g.ndata['EigVals']
    except:
        pass
    
    #Populate edge features w/ 0s
    full_g.edata['feat']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
    full_g.edata['real']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
    
    #Copy real edge data over
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long) 
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long) 
    
    return full_g


def add_edge_laplace_feats(g):

    
    EigVals = g.ndata['EigVals'][0].flatten()
    
    source, dest = g.find_edges(g.edges(form='eid'))
    
    #Compute diffusion distances and Green function
    g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs']-g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(),1).unsqueeze(2)
    
    #No longer need EigVecs and EigVals stored as node features
    del g.ndata['EigVecs']
    del g.ndata['EigVals']
    
    return g


class SBMsDataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading SBM datasets
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name
        data_dir = 'data/SBMs/'
        with open(data_dir+name+'.pkl',"rb") as f:
            f = pickle.load(f)
            self.train = f[0]
            self.val = f[1]
            self.test = f[2]
                
        print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))


    def collate(self, samples):

        graphs, labels = map(list, zip(*samples))
        labels = torch.cat(labels).long()
        batched_graph = dgl.batch(graphs)
        
        return batched_graph, labels
    

    def _laplace_decomp(self, max_freqs):
        self.train.graph_lists = [laplace_decomp(g, max_freqs) for g in self.train.graph_lists]
        self.val.graph_lists = [laplace_decomp(g, max_freqs) for g in self.val.graph_lists]
        self.test.graph_lists = [laplace_decomp(g, max_freqs) for g in self.test.graph_lists]
    

    def _make_full_graph(self):
        self.train.graph_lists = [make_full_graph(g) for g in self.train.graph_lists]
        self.val.graph_lists = [make_full_graph(g) for g in self.val.graph_lists]
        self.test.graph_lists = [make_full_graph(g) for g in self.test.graph_lists]


    def _add_edge_laplace_feats(self):
        self.train.graph_lists = [add_edge_laplace_feats(g) for g in self.train.graph_lists]
        self.val.graph_lists = [add_edge_laplace_feats(g) for g in self.val.graph_lists]
        self.test.graph_lists = [add_edge_laplace_feats(g) for g in self.test.graph_lists]  

================================================
FILE: data/data.py
================================================
"""
    File to load dataset based on user control from main file
"""
from data.molecules import MoleculeDataset
from data.SBMs import SBMsDataset
from data.molhiv import MolHIVDataset
from data.molpcba import MolPCBADataset

def LoadData(DATASET_NAME):
    """
        This function is called in the main_xx.py file 
        returns:
        ; dataset object
    """

    # handling for (ZINC) molecule dataset
    if DATASET_NAME == 'ZINC':
        return MoleculeDataset(DATASET_NAME)

    # handling for SBM datasets
    SBM_DATASETS = ['SBM_CLUSTER', 'SBM_PATTERN']
    if DATASET_NAME in SBM_DATASETS: 
        return SBMsDataset(DATASET_NAME)
    
    if DATASET_NAME == 'MOL-HIV':
        return MolHIVDataset(DATASET_NAME)
    
    if DATASET_NAME == 'MOL-PCBA':
        return MolPCBADataset(DATASET_NAME)
    

================================================
FILE: data/molecules.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np

import csv

import dgl
import torch.nn.functional as F


from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib


class MoleculeDGL(torch.utils.data.Dataset):
    def __init__(self, data_dir, split, num_graphs=None):
        self.data_dir = data_dir
        self.split = split
        self.num_graphs = num_graphs
        
        with open(data_dir + "/%s.pickle" % self.split,"rb") as f:
            self.data = pickle.load(f)

        if self.num_graphs in [10000, 1000]:
            # loading the sampled indices from file ./zinc_molecules/<split>.index
            with open(data_dir + "/%s.index" % self.split,"r") as f:
                data_idx = [list(map(int, idx)) for idx in csv.reader(f)]
                self.data = [ self.data[i] for i in data_idx[0] ]

            assert len(self.data)==num_graphs, "Sample num_graphs again; available idx: train/val/test => 10k/1k/1k"
        
        """
        data is a list of Molecule dict objects with following attributes
        
          molecule = data[idx]
        ; molecule['num_atom'] : nb of atoms, an integer (N)
        ; molecule['atom_type'] : tensor of size N, each element is an atom type, an integer between 0 and num_atom_type
        ; molecule['bond_type'] : tensor of size N x N, each element is a bond type, an integer between 0 and num_bond_type
        ; molecule['logP_SA_cycle_normalized'] : the chemical property to regress, a float variable
        """
        
        self.graph_lists = []
        self.graph_labels = []
        self.n_samples = len(self.data)
        self._prepare()
    
    def _prepare(self):
        print("preparing %d graphs for the %s set..." % (self.num_graphs, self.split.upper()))
        
        for molecule in self.data:
            node_features = molecule['atom_type'].long()
            
            adj = molecule['bond_type']
            edge_list = (adj != 0).nonzero()  # converting adj matrix to edge_list
            
            edge_idxs_in_adj = edge_list.split(1, dim=1)
            edge_features = adj[edge_idxs_in_adj].reshape(-1).long()
            
            # Create the DGL Graph
            g = dgl.DGLGraph()
            g.add_nodes(molecule['num_atom'])
            g.ndata['feat'] = node_features
            
            for src, dst in edge_list:
                g.add_edges(src.item(), dst.item())
            g.edata['feat'] = edge_features
            
            self.graph_lists.append(g)
            self.graph_labels.append(molecule['logP_SA_cycle_normalized'])
        
    def __len__(self):
        """Return the number of graphs in the dataset."""
        return self.n_samples

    def __getitem__(self, idx):
        """
            Get the idx^th sample.
            Parameters
            ---------
            idx : int
                The sample index.
            Returns
            -------
            (dgl.DGLGraph, int)
                DGLGraph with node feature stored in `feat` field
                And its label.
        """
        return self.graph_lists[idx], self.graph_labels[idx]
    
    
class MoleculeDatasetDGL(torch.utils.data.Dataset):
    def __init__(self, name='Zinc'):
        t0 = time.time()
        self.name = name
        
        self.num_atom_type = 28 # known meta-info about the zinc dataset; can be calculated as well
        self.num_bond_type = 4 # known meta-info about the zinc dataset; can be calculated as well
        
        data_dir='./data/molecules'
        
        if self.name == 'ZINC-full':
            data_dir='./data/molecules/zinc_full'
            self.train = MoleculeDGL(data_dir, 'train', num_graphs=220011)
            self.val = MoleculeDGL(data_dir, 'val', num_graphs=24445)
            self.test = MoleculeDGL(data_dir, 'test', num_graphs=5000)
        else:            
            self.train = MoleculeDGL(data_dir, 'train', num_graphs=10000)
            self.val = MoleculeDGL(data_dir, 'val', num_graphs=1000)
            self.test = MoleculeDGL(data_dir, 'test', num_graphs=1000)
        print("Time taken: {:.4f}s".format(time.time()-t0))


def laplace_decomp(g, max_freqs):


    # Laplacian
    n = g.number_of_nodes()
    A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
    N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
    L = sp.eye(g.number_of_nodes()) - N * A * N

    # Eigenvectors with numpy
    EigVals, EigVecs = np.linalg.eigh(L.toarray())
    EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs]  # Keep up to the maximum desired number of frequencies

    # Normalize and pad EigenVectors
    EigVecs = torch.from_numpy(EigVecs).float()
    EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)
    
    if n<max_freqs:
        g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs-n), value=float('nan'))
    else:
        g.ndata['EigVecs']= EigVecs
        
    
    #Save eigenvales and pad
    EigVals = torch.from_numpy(np.sort(np.abs(np.real(EigVals)))) #Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative
    
    if n<max_freqs:
        EigVals = F.pad(EigVals, (0, max_freqs-n), value=float('nan')).unsqueeze(0)
    else:
        EigVals=EigVals.unsqueeze(0)
        
    
    #Save EigVals node features
    g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(),1).unsqueeze(2)
    
    return g


def make_full_graph(g):

    
    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))

    #Here we copy over the node feature data and laplace encodings
    full_g.ndata['feat'] = g.ndata['feat']

    try:
        full_g.ndata['EigVecs'] = g.ndata['EigVecs']
        full_g.ndata['EigVals'] = g.ndata['EigVals']
    except:
        pass
    
    #Populate edge features w/ 0s
    full_g.edata['feat']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
    full_g.edata['real']=torch.zeros(full_g.number_of_edges(), dtype=torch.long)
    
    #Copy real edge data over
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(g.edata['feat'].shape[0], dtype=torch.long) 
    
    return full_g


def add_edge_laplace_feats(g):

    
    EigVals = g.ndata['EigVals'][0].flatten()
    
    source, dest = g.find_edges(g.edges(form='eid'))
    
    #Compute diffusion distances and Green function
    g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs']-g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(),1).unsqueeze(2)
    
    
    #No longer need EigVecs and EigVals stored as node features
    del g.ndata['EigVecs']
    del g.ndata['EigVals']
    
    return g


class MoleculeDataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading ZINC dataset
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name
        data_dir = 'data/molecules/'
        with open(data_dir+name+'.pkl',"rb") as f:
            f = pickle.load(f)
            self.train = f[0]
            self.val = f[1]
            self.test = f[2]
            self.num_atom_type = f[3]
            self.num_bond_type = f[4]
        print('train, test, val sizes :',len(self.train),len(self.test),len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time()-start))


    def collate(self, samples):
        graphs, labels = map(list, zip(*samples))
        labels = torch.tensor(np.array(labels)).unsqueeze(1)
        batched_graph = dgl.batch(graphs)

        return batched_graph, labels
    

    def _laplace_decomp(self, max_freqs):
        self.train.graph_lists = [laplace_decomp(g, max_freqs) for g in self.train.graph_lists]
        self.val.graph_lists = [laplace_decomp(g, max_freqs) for g in self.val.graph_lists]
        self.test.graph_lists = [laplace_decomp(g, max_freqs) for g in self.test.graph_lists]
    

    def _make_full_graph(self):
        self.train.graph_lists = [make_full_graph(g) for g in self.train.graph_lists]
        self.val.graph_lists = [make_full_graph(g) for g in self.val.graph_lists]
        self.test.graph_lists = [make_full_graph(g) for g in self.test.graph_lists]


    def _add_edge_laplace_feats(self):
        self.train.graph_lists = [add_edge_laplace_feats(g) for g in self.train.graph_lists]
        self.val.graph_lists = [add_edge_laplace_feats(g) for g in self.val.graph_lists]
        self.test.graph_lists = [add_edge_laplace_feats(g) for g in self.test.graph_lists]        


================================================
FILE: data/molhiv.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np

import csv

import dgl
import torch.nn.functional as F

from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib


def laplace_decomp(graph, max_freqs):
    g, label = graph

    # Laplacian
    n = g.number_of_nodes()
    A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
    N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
    L = sp.eye(g.number_of_nodes()) - N * A * N

    # Eigenvectors with numpy
    EigVals, EigVecs = np.linalg.eigh(L.toarray())
    EigVals, EigVecs = EigVals[: max_freqs], EigVecs[:, :max_freqs]  # Keep up to the maximum desired number of frequencies

    # Normalize and pad EigenVectors
    EigVecs = torch.from_numpy(EigVecs).float()
    EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)

    if n < max_freqs:
        g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs - n), value=float('nan'))
    else:
        g.ndata['EigVecs'] = EigVecs

    # Save eigenvalues and pad
    EigVals = torch.from_numpy(np.sort(np.abs(np.real(
        EigVals))))  # Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative

    if n < max_freqs:
        EigVals = F.pad(EigVals, (0, max_freqs - n), value=float('nan')).unsqueeze(0)
    else:
        EigVals = EigVals.unsqueeze(0)

    # Save EigVals node features
    g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(), 1).unsqueeze(2)

    return g, label


def make_full_graph(graph):
    g, label = graph

    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))

    # Copy over the node feature data and laplace eigvals/eigvecs
    full_g.ndata['feat'] = g.ndata['feat']

    try:
        full_g.ndata['EigVecs'] = g.ndata['EigVecs']
        full_g.ndata['EigVals'] = g.ndata['EigVals']
    except:
        pass

    # Initalize fake edge features w/ 0s
    full_g.edata['feat'] = torch.zeros(full_g.number_of_edges(), 3, dtype=torch.long)
    full_g.edata['real'] = torch.zeros(full_g.number_of_edges(), dtype=torch.long)

    # Copy real edge data over, and identify real edges!
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(
        g.edata['feat'].shape[0], dtype=torch.long)  # This indicates real edges

    return full_g, label


def add_edge_laplace_feats(graph):
    g, label = graph

    EigVals = g.ndata['EigVals'][0].flatten()

    source, dest = g.find_edges(g.edges(form='eid'))

    # Compute diffusion differences and Green function
    g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs'] - g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(), 1).unsqueeze(2)

    # No longer need EigVecs and EigVals stored as node features
    del g.ndata['EigVecs']
    del g.ndata['EigVals']

    return g, label


from ogb.graphproppred import DglGraphPropPredDataset, collate_dgl


class MolHIVDataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading ZINC dataset
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name

        dataset = DglGraphPropPredDataset(name='ogbg-molhiv')
        split_idx = dataset.get_idx_split()

        split_idx["train"] = split_idx["train"]
        split_idx["valid"] = split_idx["valid"]
        split_idx["test"] = split_idx["test"]

        self.train = dataset[split_idx["train"]]
        self.val = dataset[split_idx["valid"]]
        self.test = dataset[split_idx["test"]]

        print('train, test, val sizes :', len(self.train), len(self.test), len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time() - start))

    def collate(self, samples):
        graphs, labels = map(list, zip(*samples))
        batched_graph = dgl.batch(graphs)
        labels = torch.stack(labels)

        return batched_graph, labels

    def _laplace_decomp(self, max_freqs):
        self.train = [laplace_decomp(graph, max_freqs) for graph in self.train]
        self.val = [laplace_decomp(graph, max_freqs) for graph in self.val]
        self.test = [laplace_decomp(graph, max_freqs) for graph in self.test]

    def _make_full_graph(self):
        self.train = [make_full_graph(graph) for graph in self.train]
        self.val = [make_full_graph(graph) for graph in self.val]
        self.test = [make_full_graph(graph) for graph in self.test]

    def _add_edge_laplace_feats(self):
        self.train = [add_edge_laplace_feats(graph) for graph in self.train]
        self.val = [add_edge_laplace_feats(graph) for graph in self.val]
        self.test = [add_edge_laplace_feats(graph) for graph in self.test]


================================================
FILE: data/molpcba.py
================================================
import torch
import pickle
import torch.utils.data
import time
import os
import numpy as np

import csv

import dgl
import torch.nn.functional as F

from scipy import sparse as sp
import numpy as np
import networkx as nx
import hashlib

from tqdm.std import tqdm


def laplace_decomp(graph, max_freqs):
    g, label = graph

    # Laplacian
    n = g.number_of_nodes()
    A = g.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
    N = sp.diags(dgl.backend.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float)
    L = sp.eye(g.number_of_nodes()) - N * A * N


    EigVals, EigVecs = np.linalg.eig(L.toarray())
    idx = EigVals.argsort()[0 : max_freqs] # Keep up to the maximum desired number of frequencies
    EigVals, EigVecs = EigVals[idx], np.real(EigVecs[:,idx])

    #Sort, normalize and pad EigenVectors
    EigVecs = EigVecs[:, EigVals.argsort()]# increasing order

    # Normalize and pad EigenVectors
    EigVecs = torch.from_numpy(EigVecs).float()
    EigVecs = F.normalize(EigVecs, p=2, dim=1, eps=1e-12, out=None)

    if n < max_freqs:
        g.ndata['EigVecs'] = F.pad(EigVecs, (0, max_freqs - n), value=float('nan'))
    else:
        g.ndata['EigVecs'] = EigVecs

    # Save eigenvalues and pad
    EigVals = torch.from_numpy(np.sort(np.abs(np.real(
        EigVals))))  # Abs value is taken because numpy sometimes computes the first eigenvalue approaching 0 from the negative

    if n < max_freqs:
        EigVals = F.pad(EigVals, (0, max_freqs - n), value=float('nan')).unsqueeze(0)
    else:
        EigVals = EigVals.unsqueeze(0)

    # Save EigVals node features
    g.ndata['EigVals'] = EigVals.repeat(g.number_of_nodes(), 1).unsqueeze(2)

    return g, label


def make_full_graph(graph):
    g, label = graph

    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))

    # Copy over the node feature data and laplace eigvals/eigvecs
    full_g.ndata['feat'] = g.ndata['feat']

    try:
        full_g.ndata['EigVecs'] = g.ndata['EigVecs']
        full_g.ndata['EigVals'] = g.ndata['EigVals']
    except:
        pass

    # Initalize fake edge features w/ 0s
    full_g.edata['feat'] = torch.zeros(full_g.number_of_edges(), 3, dtype=torch.long)
    full_g.edata['real'] = torch.zeros(full_g.number_of_edges(), dtype=torch.long)

    # Copy real edge data over, and identify real edges!
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['feat'] = g.edata['feat']
    full_g.edges[g.edges(form='uv')[0].tolist(), g.edges(form='uv')[1].tolist()].data['real'] = torch.ones(
        g.edata['feat'].shape[0], dtype=torch.long)  # This indicates real edges

    return full_g, label


def add_edge_laplace_feats(graph):
    g, label = graph

    EigVals = g.ndata['EigVals'][0].flatten()

    source, dest = g.find_edges(g.edges(form='eid'))

    # Compute diffusion differences and Green function
    g.edata['diff'] = torch.abs(g.nodes[source].data['EigVecs'] - g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['product'] = torch.mul(g.nodes[source].data['EigVecs'], g.nodes[dest].data['EigVecs']).unsqueeze(2)
    g.edata['EigVals'] = EigVals.repeat(g.number_of_edges(), 1).unsqueeze(2)

    # No longer need EigVecs and EigVals stored as node features
    del g.ndata['EigVecs']
    del g.ndata['EigVals']

    return g, label


from ogb.graphproppred import DglGraphPropPredDataset, collate_dgl


class MolPCBADataset(torch.utils.data.Dataset):

    def __init__(self, name):
        """
            Loading PCBA dataset
        """
        start = time.time()
        print("[I] Loading dataset %s..." % (name))
        self.name = name

        dataset = DglGraphPropPredDataset(name='ogbg-molpcba')
        split_idx = dataset.get_idx_split()

        split_idx["train"] = split_idx["train"]
        split_idx["valid"] = split_idx["valid"]
        split_idx["test"] = split_idx["test"]

        self.train = dataset[split_idx["train"]]
        self.val = dataset[split_idx["valid"]]
        self.test = dataset[split_idx["test"]]

        print('train, test, val sizes :', len(self.train), len(self.test), len(self.val))
        print("[I] Finished loading.")
        print("[I] Data load time: {:.4f}s".format(time.time() - start))

    def collate(self, samples):
        graphs, labels = map(list, zip(*samples))
        batched_graph = dgl.batch(graphs)
        labels = torch.stack(labels)

        return batched_graph, labels

    def _laplace_decomp(self, max_freqs):
        self.train = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.train)]
        self.val = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.val)]
        self.test = [laplace_decomp(graph, max_freqs) for graph in tqdm(self.test)]

    def _make_full_graph(self):
        self.train = [make_full_graph(graph) for graph in tqdm(self.train)]
        self.val = [make_full_graph(graph) for graph in tqdm(self.val)]
        self.test = [make_full_graph(graph) for graph in tqdm(self.test)]

    def _add_edge_laplace_feats(self):
        self.train = [add_edge_laplace_feats(graph) for graph in tqdm(self.train)]
        self.val = [add_edge_laplace_feats(graph) for graph in tqdm(self.val)]
        self.test = [add_edge_laplace_feats(graph) for graph in tqdm(self.test)]


================================================
FILE: data/script_download_SBMs.sh
================================================


# Command to download dataset:
#   bash script_download_SBMs.sh


mkdir SBMs
cd SBMs


FILE=SBM_CLUSTER.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_CLUSTER.pkl -o SBM_CLUSTER.pkl -J -L -k
fi


FILE=SBM_PATTERN.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_PATTERN.pkl -o SBM_PATTERN.pkl -J -L -k
fi


================================================
FILE: data/script_download_all_datasets.sh
================================================


# Command to download dataset:
#   bash script_download_all_datasets.sh


############
# ZINC
############

mkdir molecules
cd molecules

FILE=ZINC.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/ZINC.pkl -o ZINC.pkl -J -L -k
fi

cd ..


############
# PATTERN and CLUSTER 
############

mkdir SBMs
cd SBMs

FILE=SBM_CLUSTER.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_CLUSTER.pkl -o SBM_CLUSTER.pkl -J -L -k
fi

FILE=SBM_PATTERN.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/SBM_PATTERN.pkl -o SBM_PATTERN.pkl -J -L -k
fi


cd ..


================================================
FILE: data/script_download_molecules.sh
================================================


# Command to download dataset:
#   bash script_download_molecules.sh


mkdir molecules/
cd molecules


FILE=ZINC.pkl
if test -f "$FILE"; then
	echo -e "$FILE already downloaded."
else
	echo -e "\ndownloading $FILE..."
	curl https://data.dgl.ai/dataset/benchmarking-gnns/ZINC.pkl -o ZINC.pkl -J -L -k
fi


================================================
FILE: layers/graph_transformer_layer.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import dgl.function as fn
import numpy as np

"""
    Graph Transformer Layer
    
"""

"""
    Util functions
"""
def src_dot_dst(src_field, dst_field, out_field):
    def func(edges):
        return {out_field: (edges.src[src_field] * edges.dst[dst_field])}
    return func


def scaling(field, scale_constant):
    def func(edges):
        return {field: ((edges.data[field]) / scale_constant)}
    return func

# Improving implicit attention scores with explicit edge features, if available
def imp_exp_attn(implicit_attn, explicit_edge):
    """
        implicit_attn: the output of K Q
        explicit_edge: the explicit edge features
    """
    def func(edges):
        return {implicit_attn: (edges.data[implicit_attn] * edges.data[explicit_edge])}
    return func


def exp_real(field, L):
    def func(edges):
        # clamp for softmax numerical stability
        return {'score_soft': torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))/(L+1)}
    return func


def exp_fake(field, L):
    def func(edges):
        # clamp for softmax numerical stability
        return {'score_soft': L*torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))/(L+1)}
    return func

def exp(field):
    def func(edges):
        # clamp for softmax numerical stability
        return {'score_soft': torch.exp((edges.data[field].sum(-1, keepdim=True)).clamp(-5, 5))}
    return func


"""
    Single Attention Head
"""

class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, use_bias):
        super().__init__()
        
       
        self.out_dim = out_dim
        self.num_heads = num_heads
        self.gamma = gamma
        self.full_graph=full_graph
        
        if use_bias:
            self.Q = nn.Linear(in_dim, out_dim * num_heads, bias=True)
            self.K = nn.Linear(in_dim, out_dim * num_heads, bias=True)
            self.E = nn.Linear(in_dim, out_dim * num_heads, bias=True)
            
            if self.full_graph:
                self.Q_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
                self.K_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
                self.E_2 = nn.Linear(in_dim, out_dim * num_heads, bias=True)
            
            self.V = nn.Linear(in_dim, out_dim * num_heads, bias=True)
            
        else:
            self.Q = nn.Linear(in_dim, out_dim * num_heads, bias=False)
            self.K = nn.Linear(in_dim, out_dim * num_heads, bias=False)
            self.E = nn.Linear(in_dim, out_dim * num_heads, bias=False)
            
            if self.full_graph:
                self.Q_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
                self.K_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
                self.E_2 = nn.Linear(in_dim, out_dim * num_heads, bias=False)
                
            self.V = nn.Linear(in_dim, out_dim * num_heads, bias=False)
    
    def propagate_attention(self, g):

        
        if self.full_graph:
            real_ids = torch.nonzero(g.edata['real']).squeeze()
            fake_ids = torch.nonzero(g.edata['real']==0).squeeze()

        else:
            real_ids = g.edges(form='eid')
            
        g.apply_edges(src_dot_dst('K_h', 'Q_h', 'score'), edges=real_ids)
        
        if self.full_graph:
            g.apply_edges(src_dot_dst('K_2h', 'Q_2h', 'score'), edges=fake_ids)
        

        # scale scores by sqrt(d)
        g.apply_edges(scaling('score', np.sqrt(self.out_dim)))
        
        # Use available edge features to modify the scores for edges
        g.apply_edges(imp_exp_attn('score', 'E'), edges=real_ids)
        
        if self.full_graph:
            g.apply_edges(imp_exp_attn('score', 'E_2'), edges=fake_ids)
    
        if self.full_graph:
            # softmax and scaling by gamma
            L=self.gamma
            g.apply_edges(exp_real('score', L), edges=real_ids)
            g.apply_edges(exp_fake('score', L), edges=fake_ids)
        
        else:
            g.apply_edges(exp('score'), edges=real_ids)

        # Send weighted values to target nodes
        eids = g.edges()
        g.send_and_recv(eids, fn.src_mul_edge('V_h', 'score_soft', 'V_h'), fn.sum('V_h', 'wV'))
        g.send_and_recv(eids, fn.copy_edge('score_soft', 'score_soft'), fn.sum('score_soft', 'z'))
    
    
    def forward(self, g, h, e):
        
        Q_h = self.Q(h)
        K_h = self.K(h)
        E = self.E(e)
        
        if self.full_graph:
            Q_2h = self.Q_2(h)
            K_2h = self.K_2(h)
            E_2 = self.E_2(e)
            
        V_h = self.V(h)

        
        # Reshaping into [num_nodes, num_heads, feat_dim] to 
        # get projections for multi-head attention
        g.ndata['Q_h'] = Q_h.view(-1, self.num_heads, self.out_dim)
        g.ndata['K_h'] = K_h.view(-1, self.num_heads, self.out_dim)
        g.edata['E'] = E.view(-1, self.num_heads, self.out_dim)
        
        
        if self.full_graph:
            g.ndata['Q_2h'] = Q_2h.view(-1, self.num_heads, self.out_dim)
            g.ndata['K_2h'] = K_2h.view(-1, self.num_heads, self.out_dim)
            g.edata['E_2'] = E_2.view(-1, self.num_heads, self.out_dim)
        
        g.ndata['V_h'] = V_h.view(-1, self.num_heads, self.out_dim)

        self.propagate_attention(g)
        
        h_out = g.ndata['wV'] / (g.ndata['z'] + torch.full_like(g.ndata['z'], 1e-6))
        
        return h_out
    

class GraphTransformerLayer(nn.Module):
    """
        Param: 
    """
    def __init__(self, gamma, in_dim, out_dim, num_heads, full_graph, dropout=0.0, layer_norm=False, batch_norm=True, residual=True, use_bias=False):
        super().__init__()
        
        self.in_channels = in_dim
        self.out_channels = out_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.residual = residual
        self.layer_norm = layer_norm     
        self.batch_norm = batch_norm
        
        self.attention = MultiHeadAttentionLayer(gamma, in_dim, out_dim//num_heads, num_heads, full_graph, use_bias)
        
        self.O_h = nn.Linear(out_dim, out_dim)

        if self.layer_norm:
            self.layer_norm1_h = nn.LayerNorm(out_dim)
            
        if self.batch_norm:
            self.batch_norm1_h = nn.BatchNorm1d(out_dim)
        
        # FFN for h
        self.FFN_h_layer1 = nn.Linear(out_dim, out_dim*2)
        self.FFN_h_layer2 = nn.Linear(out_dim*2, out_dim)
        

        if self.layer_norm:
            self.layer_norm2_h = nn.LayerNorm(out_dim)
            
        if self.batch_norm:
            self.batch_norm2_h = nn.BatchNorm1d(out_dim)
        
    def forward(self, g, h, e):
        h_in1 = h # for first residual connection
        
        # multi-head attention out
        h_attn_out = self.attention(g, h, e)
        
        #Concat multi-head outputs
        h = h_attn_out.view(-1, self.out_channels)
       
        h = F.dropout(h, self.dropout, training=self.training)

        h = self.O_h(h)

        if self.residual:
            h = h_in1 + h # residual connection

        if self.layer_norm:
            h = self.layer_norm1_h(h)

        if self.batch_norm:
            h = self.batch_norm1_h(h)

        h_in2 = h # for second residual connection

        # FFN for h
        h = self.FFN_h_layer1(h)
        h = F.relu(h)
        h = F.dropout(h, self.dropout, training=self.training)
        h = self.FFN_h_layer2(h)

        if self.residual:
            h = h_in2 + h # residual connection       

        if self.layer_norm:
            h = self.layer_norm2_h(h)

        if self.batch_norm:
            h = self.batch_norm2_h(h)         

        return h, e
        
    def __repr__(self):
        return '{}(in_channels={}, out_channels={}, heads={}, residual={})'.format(self.__class__.__name__,
                                             self.in_channels,
                                             self.out_channels, self.num_heads, self.residual)

================================================
FILE: layers/mlp_readout_layer.py
================================================
import torch.nn as nn
import torch.nn.functional as F

"""
    MLP Layer used after graph vector representation
"""


class MLPReadout(nn.Module):

    def __init__(self, input_dim, output_dim, L=2):  # L=nb_hidden_layers
        super().__init__()
        list_FC_layers = [nn.Linear(input_dim // 2 ** l, input_dim // 2 ** (l + 1), bias=True) for l in range(L)]
        list_FC_layers.append(nn.Linear(input_dim // 2 ** L, output_dim, bias=True))
        self.FC_layers = nn.ModuleList(list_FC_layers)
        self.L = L

    def forward(self, x):
        y = x
        for l in range(self.L):
            y = self.FC_layers[l](y)
            y = F.relu(y)
        y = self.FC_layers[self.L](y)
        return y


================================================
FILE: main_SBMs_node_classification.py
================================================
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm

class DotDict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.SBMs_node_classification.load_net import gnn_model 
from data.data import LoadData 


"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


"""
    VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
    model = gnn_model(LPE, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))
        
    if LPE == 'edge':
        print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
    elif LPE == 'node':
        print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
    else:
        print('Encoding Type/Total parameters:', 'None', total_param)
    return total_param


"""
    TRAINING CODE
"""

def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
    
    start0 = time.time()
    per_epoch_time = []
    
    DATASET_NAME = dataset.name
    
    if net_params['LPE'] in ['edge', 'node']:
        st = time.time()
        print("[!] Computing Laplace Decompositions..")
        dataset._laplace_decomp(net_params['m'])
        print('Time taken to decompose Laplacians: ',time.time()-st)
        
    if net_params['full_graph']:
        st = time.time()
        print("[!] Adding full graph connectivity..")
        dataset._make_full_graph()
        print('Time taken to add full graph connectivity: ',time.time()-st)
        
    if net_params['LPE'] == 'edge':
        st = time.time()
        print("[!] Computing edge Laplace features..")
        dataset._add_edge_laplace_feats()
        print('Time taken to compute edge Laplace features: ',time.time()-st)
        
    
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    
    net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
       
    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']
    
    # Write network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
        
    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])
    
    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))
    print("Number of Classes: ", net_params['n_classes'])
    

    model = gnn_model(net_params['LPE'], net_params)
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                     factor=params['lr_reduce_factor'],
                                                     patience=params['lr_schedule_patience'],
                                                     verbose=True)
    
    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_accs, epoch_val_accs, epoch_test_accs = [], [], []
    
    # import train and evaluate functions
    from train.train_SBMs_node_classification import train_epoch, evaluate_network 

    train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
    val_loader = DataLoader(valset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
    test_loader = DataLoader(testset, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
        
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs'])) as t:
            for epoch in t:


                start = time.time()

                epoch_train_loss, epoch_train_acc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])
                    
                epoch_val_loss, epoch_val_acc = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
                _, epoch_test_acc = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])    
                
                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_accs.append(epoch_train_acc)
                epoch_val_accs.append(epoch_val_acc)
                
                epoch_test_accs.append(epoch_test_acc)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_acc', epoch_train_acc, epoch)
                writer.add_scalar('val/_acc', epoch_val_acc, epoch)
                writer.add_scalar('test/_acc', epoch_test_acc, epoch)
                writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)

                t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
                              train_loss=epoch_train_loss, val_loss=epoch_val_loss,
                              train_acc=epoch_train_acc, val_acc=epoch_val_acc,
                              test_acc=epoch_test_acc)

                per_epoch_time.append(time.time()-start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch-1:
                        os.remove(file)

                scheduler.step(epoch_val_loss)

                if optimizer.param_groups[0]['lr'] < params['min_lr']:
                    print("\n!! LR SMALLER OR EQUAL TO MIN LR THRESHOLD.")
                    break
                    
                # Stop training after params['max_time'] hours
                if time.time()-start0 > params['max_time']*3600:
                    print('-' * 89)
                    print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
                    break
    
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')
    
    #Return test and train metrics at best val metric
    index = epoch_val_accs.index(max(epoch_val_accs))
    
    test_acc = epoch_test_accs[index]
    train_acc = epoch_train_accs[index]
    
    print("Test Accuracy: {:.4f}".format(test_acc))
    print("Train Accuracy: {:.4f}".format(train_acc))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-start0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()

    """
        Write the results in out_dir/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST ACCURACY: {:.4f}\nTRAIN ACCURACY: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_acc, train_acc, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))

        
def main():    
    """
        USER CONTROLS
    """
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
    parser.add_argument('--gpu_id', help="Please give a value for gpu id")
    parser.add_argument('--model', help="Please give a value for model name")
    parser.add_argument('--dataset', help="Please give a value for dataset name")
    parser.add_argument('--out_dir', help="Please give a value for out_dir")
    parser.add_argument('--seed', help="Please give a value for seed")
    parser.add_argument('--epochs', help="Please give a value for epochs")
    parser.add_argument('--batch_size', help="Please give a value for batch_size")
    parser.add_argument('--init_lr', help="Please give a value for init_lr")
    parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
    parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
    parser.add_argument('--min_lr', help="Please give a value for min_lr")
    parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
    parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
    parser.add_argument('--max_time', help="Please give a value for max_time")
    
    #Model details
    parser.add_argument('--full_graph', help="Please give a value for full_graph")    
    parser.add_argument('--gamma', help="Please give a value for gamma")
    parser.add_argument('--m', help="Please give a value for m")
    
    parser.add_argument('--LPE', help="Please give a value for LPE")
    parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
    parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
    parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
    
    parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
    parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
    parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
    parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
    
    parser.add_argument('--residual', help="Please give a value for readout")
    parser.add_argument('--readout', help="Please give a value for readout")
    parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
    parser.add_argument('--dropout', help="Please give a value for dropout")
    parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
    parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
    args = parser.parse_args()
    
    with open(args.config) as f:
        config = json.load(f)
        
    # device
    if args.gpu_id is not None:
        config['gpu']['id'] = int(args.gpu_id)
        config['gpu']['use'] = True
    device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
    # model, dataset, out_dir
    if args.model is not None:
        MODEL_NAME = args.model
    else:
        MODEL_NAME = config['model']
    if args.dataset is not None:
        DATASET_NAME = args.dataset
    else:
        DATASET_NAME = config['dataset']
    dataset = LoadData(DATASET_NAME)
    if args.out_dir is not None:
        out_dir = args.out_dir
    else:
        out_dir = config['out_dir']
    # parameters
    params = config['params']
    if args.seed is not None:
        params['seed'] = int(args.seed)
    if args.epochs is not None:
        params['epochs'] = int(args.epochs)
    if args.batch_size is not None:
        params['batch_size'] = int(args.batch_size)
    if args.init_lr is not None:
        params['init_lr'] = float(args.init_lr)
    if args.lr_reduce_factor is not None:
        params['lr_reduce_factor'] = float(args.lr_reduce_factor)
    if args.lr_schedule_patience is not None:
        params['lr_schedule_patience'] = int(args.lr_schedule_patience)
    if args.min_lr is not None:
        params['min_lr'] = float(args.min_lr)
    if args.weight_decay is not None:
        params['weight_decay'] = float(args.weight_decay)
    if args.print_epoch_interval is not None:
        params['print_epoch_interval'] = int(args.print_epoch_interval)
    if args.max_time is not None:
        params['max_time'] = float(args.max_time)
        
        
    # model parameters
    net_params = config['net_params']
    net_params['device'] = device
    net_params['gpu_id'] = config['gpu']['id']
    net_params['batch_size'] = params['batch_size']
    
    
    if args.full_graph is not None:
        net_params['full_graph'] = True if args.full_graph=='True' else False
    if args.gamma is not None:
        net_params['gamma'] = float(args.gamma)
    if args.m is not None:
        net_params['m'] = int(args.m)
        
      
    if args.LPE is not None:
        net_params['LPE'] = args.LPE
        
        
    if net_params['LPE'] not in ['node', 'edge', 'none']:
        print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
        exit()
        
    if args.LPE_layers is not None:
        net_params['LPE_layers'] = int(args.LPE_layers)
    if args.LPE_dim is not None:
        net_params['LPE_dim'] = int(args.LPE_dim)
    if args.LPE_n_heads is not None:
        net_params['LPE_n_heads'] = int(args.LPE_n_heads)   
              
    if args.GT_layers is not None:
        net_params['GT_layers'] = int(args.GT_layers)
    if args.GT_hidden_dim is not None:
        net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
    if args.GT_out_dim is not None:
        net_params['GT_out_dim'] = int(args.GT_out_dim)   
    if args.GT_n_heads is not None:
        net_params['GT_n_heads'] = int(args.GT_n_heads)  
              
    if args.residual is not None:
        net_params['residual'] = True if args.residual=='True' else False
    if args.readout is not None:
        net_params['readout'] = args.readout
    if args.in_feat_dropout is not None:
        net_params['in_feat_dropout'] = float(args.in_feat_dropout)
    if args.dropout is not None:
        net_params['dropout'] = float(args.dropout)
    if args.layer_norm is not None:
        net_params['layer_norm'] = True if args.layer_norm=='True' else False
    if args.batch_norm is not None:
        net_params['batch_norm'] = True if args.batch_norm=='True' else False
        
    # SBM

    net_params['in_dim'] = torch.unique(dataset.train[0][0].ndata['feat'],dim=0).size(0) # node_dim (feat is an integer)
    
    net_params['n_classes'] = torch.unique(dataset.train[0][1],dim=0).size(0)
    

    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file

    if not os.path.exists(out_dir + 'results'):
        os.makedirs(out_dir + 'results')
        
    if not os.path.exists(out_dir + 'configs'):
        os.makedirs(out_dir + 'configs')

    train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)

    
main()    


================================================
FILE: main_ZINC_graph_regression.py
================================================
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm


"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.ZINC_graph_regression.load_net import gnn_model 
from data.data import LoadData 


"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)  

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


"""
    VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
    model = gnn_model(LPE, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))
        
    if LPE == 'edge':
        print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
    elif LPE == 'node':
        print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
    else:
        print('Encoding Type/Total parameters:', 'None', total_param)
    return total_param


"""
    TRAINING CODE
"""

def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
    start0 = time.time()
    per_epoch_time = []
        
    DATASET_NAME = dataset.name
    
    if net_params['LPE'] in ['edge', 'node']:
        st = time.time()
        print("[!] Computing Laplace Decompositions..")
        dataset._laplace_decomp(net_params['m'])
        print('Time taken to decompose Laplacians: ',time.time()-st)
        
    if net_params['full_graph']:
        st = time.time()
        print("[!] Adding full graph connectivity..")
        dataset._make_full_graph()
        print('Time taken to add full graph connectivity: ',time.time()-st)
        
    if net_params['LPE'] == 'edge':
        st = time.time()
        print("[!] Computing edge Laplace features..")
        dataset._add_edge_laplace_feats()
        print('Time taken to compute edge Laplace features: ',time.time()-st)
    
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    
    net_params['total_param'] = view_model_param(net_params['LPE'], net_params)
        
    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']
    
    # Write the network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))
        
    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])
    
    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))

    model = gnn_model(net_params['LPE'], net_params)
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                     factor=params['lr_reduce_factor'],
                                                     patience=params['lr_schedule_patience'],
                                                     verbose=True)
    
    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_MAEs, epoch_val_MAEs, epoch_test_MAEs = [], [], []
    
        
    # import train and evaluate functions
    from train.train_ZINC_graph_regression import train_epoch, evaluate_network

    
    train_loader = DataLoader(trainset, num_workers=4, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
    val_loader = DataLoader(valset, num_workers=4, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
    test_loader = DataLoader(testset, num_workers=4, batch_size=params['batch_size'], shuffle=False, collate_fn=dataset.collate)
    
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs'])) as t:
            for epoch in t:

                t.set_description('Epoch %d' % epoch)

                start = time.time()

                epoch_train_loss, epoch_train_mae, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])
                    
                epoch_val_loss, epoch_val_mae = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
                _, epoch_test_mae = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])
                
                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_MAEs.append(epoch_train_mae)
                epoch_val_MAEs.append(epoch_val_mae)
                
                epoch_test_MAEs.append(epoch_test_mae)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_mae', epoch_train_mae, epoch)
                writer.add_scalar('val/_mae', epoch_val_mae, epoch)
                writer.add_scalar('test/_mae', epoch_test_mae, epoch)
                writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)

                        
                t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
                              train_loss=epoch_train_loss, val_loss=epoch_val_loss,
                              train_MAE=epoch_train_mae, val_MAE=epoch_val_mae,
                              test_MAE=epoch_test_mae)


                per_epoch_time.append(time.time()-start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch-1:
                        os.remove(file)

                scheduler.step(epoch_val_loss)

                if optimizer.param_groups[0]['lr'] < params['min_lr']:
                    print("\n!! LR EQUAL TO MIN LR SET.")
                    break
                
                # Stop training after params['max_time'] hours
                if time.time()-start0 > params['max_time']*3600:
                    print('-' * 89)
                    print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
                    break
                
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')
        
    #Return test and train metrics at best val metric
    index = epoch_val_MAEs.index(min(epoch_val_MAEs))
    
    test_mae = epoch_test_MAEs[index]
    train_mae = epoch_train_MAEs[index]
    
    print("Test MAE: {:.4f}".format(test_mae))
    print("Train MAE: {:.4f}".format(train_mae))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-start0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()

    """
        Write the results in out_dir/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST MAE: {:.4f}\nTRAIN MAE: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_mae, train_mae, epoch, (time.time()-start0)/3600, np.mean(per_epoch_time)))
        

def main():    
    """
        USER CONTROLS
    """
    
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
    parser.add_argument('--gpu_id', help="Please give a value for gpu id")
    parser.add_argument('--model', help="Please give a value for model name")
    parser.add_argument('--dataset', help="Please give a value for dataset name")
    parser.add_argument('--out_dir', help="Please give a value for out_dir")
    parser.add_argument('--seed', help="Please give a value for seed")
    parser.add_argument('--epochs', help="Please give a value for epochs")
    parser.add_argument('--batch_size', help="Please give a value for batch_size")
    parser.add_argument('--init_lr', help="Please give a value for init_lr")
    parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
    parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
    parser.add_argument('--min_lr', help="Please give a value for min_lr")
    parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
    parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
    parser.add_argument('--max_time', help="Please give a value for max_time")
    
    #Model details
    parser.add_argument('--full_graph', help="Please give a value for full_graph")    
    parser.add_argument('--gamma', help="Please give a value for gamma")
    parser.add_argument('--m', help="Please give a value for m")
    
    parser.add_argument('--LPE', help="Please give a value for LPE")
    parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
    parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
    parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
    
    parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
    parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
    parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
    parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")
    
    parser.add_argument('--residual', help="Please give a value for readout")
    parser.add_argument('--readout', help="Please give a value for readout")
    parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
    parser.add_argument('--dropout', help="Please give a value for dropout")
    parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
    parser.add_argument('--batch_norm', help="Please give a value for batch_norm")
    args = parser.parse_args()
    
    with open(args.config) as f:
        config = json.load(f)
        
    # device
    if args.gpu_id is not None:
        config['gpu']['id'] = int(args.gpu_id)
        config['gpu']['use'] = True
    device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
    # model, dataset, out_dir
    if args.model is not None:
        MODEL_NAME = args.model
    else:
        MODEL_NAME = config['model']
    if args.dataset is not None:
        DATASET_NAME = args.dataset
    else:
        DATASET_NAME = config['dataset']
    dataset = LoadData(DATASET_NAME)
    if args.out_dir is not None:
        out_dir = args.out_dir
    else:
        out_dir = config['out_dir']
    # parameters
    params = config['params']
    if args.seed is not None:
        params['seed'] = int(args.seed)
    if args.epochs is not None:
        params['epochs'] = int(args.epochs)
    if args.batch_size is not None:
        params['batch_size'] = int(args.batch_size)
    if args.init_lr is not None:
        params['init_lr'] = float(args.init_lr)
    if args.lr_reduce_factor is not None:
        params['lr_reduce_factor'] = float(args.lr_reduce_factor)
    if args.lr_schedule_patience is not None:
        params['lr_schedule_patience'] = int(args.lr_schedule_patience)
    if args.min_lr is not None:
        params['min_lr'] = float(args.min_lr)
    if args.weight_decay is not None:
        params['weight_decay'] = float(args.weight_decay)
    if args.print_epoch_interval is not None:
        params['print_epoch_interval'] = int(args.print_epoch_interval)
    if args.max_time is not None:
        params['max_time'] = float(args.max_time)
    
        
    # model parameters
    net_params = config['net_params']
    net_params['device'] = device
    net_params['gpu_id'] = config['gpu']['id']
    net_params['batch_size'] = params['batch_size']
    
    
    if args.full_graph is not None:
        net_params['full_graph'] = True if args.full_graph=='True' else False
    if args.gamma is not None:
        net_params['gamma'] = float(args.gamma)
    if args.m is not None:
        net_params['m'] = int(args.m)
        
      
    if args.LPE is not None:
        net_params['LPE'] = args.LPE
        
        
    if net_params['LPE'] not in ['node', 'edge', 'none']:
        print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
        exit()
        
    if args.LPE_layers is not None:
        net_params['LPE_layers'] = int(args.LPE_layers)
    if args.LPE_dim is not None:
        net_params['LPE_dim'] = int(args.LPE_dim)
    if args.LPE_n_heads is not None:
        net_params['LPE_n_heads'] = int(args.LPE_n_heads)   
              
    if args.GT_layers is not None:
        net_params['GT_layers'] = int(args.GT_layers)
    if args.GT_hidden_dim is not None:
        net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
    if args.GT_out_dim is not None:
        net_params['GT_out_dim'] = int(args.GT_out_dim)   
    if args.GT_n_heads is not None:
        net_params['GT_n_heads'] = int(args.GT_n_heads)  
              
    if args.residual is not None:
        net_params['residual'] = True if args.residual=='True' else False
    if args.readout is not None:
        net_params['readout'] = args.readout
    if args.in_feat_dropout is not None:
        net_params['in_feat_dropout'] = float(args.in_feat_dropout)
    if args.dropout is not None:
        net_params['dropout'] = float(args.dropout)
    if args.layer_norm is not None:
        net_params['layer_norm'] = True if args.layer_norm=='True' else False
    if args.batch_norm is not None:
        net_params['batch_norm'] = True if args.batch_norm=='True' else False

    # ZINC
    net_params['num_atom_type'] = dataset.num_atom_type
    net_params['num_bond_type'] = dataset.num_bond_type
    
    #If using full graph, need to add a possible edge type (fake edge)
    if net_params['full_graph']:
        net_params['num_bond_type']+=1
        

    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file

    if not os.path.exists(out_dir + 'results'):
        os.makedirs(out_dir + 'results')
        
    if not os.path.exists(out_dir + 'configs'):
        os.makedirs(out_dir + 'configs')

    train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)

    
main()    


================================================
FILE: main_molhiv.py
================================================
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm


"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molhiv_graph_regression.load_net import gnn_model
from data.data import LoadData


"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


"""
    VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
    model = gnn_model(LPE, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))

    if LPE == 'edge':
        print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
    elif LPE == 'node':
        print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
    else:
        print('Encoding Type/Total parameters:', 'None', total_param)
    return total_param


"""
    TRAINING CODE
"""

def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
    t0 = time.time()
    per_epoch_time = []

    DATASET_NAME = dataset.name

    if net_params['LPE'] in ['edge', 'node']:
        st = time.time()
        print("[!] Computing Laplace Decompositions..")
        dataset._laplace_decomp(net_params['m'])
        print('Time LapPE:',time.time()-st)

    if net_params['full_graph']:
        st = time.time()
        print("[!] Adding full graph connectivity..")
        dataset._make_full_graph()
        print('Time taken to convert to full graphs:',time.time()-st)

    if net_params['LPE'] == 'edge':
        st = time.time()
        print("[!] Computing edge Laplace features..")
        dataset._add_edge_laplace_feats()
        print('Time taken to compute edge Laplace features: ',time.time()-st)


    net_params['total_param'] = view_model_param(net_params['LPE'], net_params)

    trainset, valset, testset = dataset.train, dataset.val, dataset.test


    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']

    # Write the network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))

    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])

    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))

    model = gnn_model(net_params['LPE'], net_params)
    model = model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                     factor=params['lr_reduce_factor'],
                                                     patience=params['lr_schedule_patience'],
                                                     verbose=True)

    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_AUCs, epoch_val_AUCs, epoch_test_AUCs = [], [], []


    from train.train_molhiv import train_epoch, evaluate_network


    train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
    val_loader = DataLoader(valset, batch_size=params['batch_size'],  shuffle=False, collate_fn=dataset.collate)
    test_loader = DataLoader(testset, batch_size=params['batch_size'],  shuffle=False, collate_fn=dataset.collate)

    prev_lr = params['init_lr']

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs'])) as t:
            for epoch in t:

                t.set_description('Epoch %d' % epoch)

                start = time.time()

                epoch_train_loss, epoch_train_auc, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'])

                epoch_val_loss, epoch_val_auc = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
                _, epoch_test_auc = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])

                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_AUCs.append(epoch_train_auc)
                epoch_val_AUCs.append(epoch_val_auc)

                epoch_test_AUCs.append(epoch_test_auc)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_auc', epoch_train_auc, epoch)
                writer.add_scalar('val/_auc', epoch_val_auc, epoch)
                writer.add_scalar('test/_auc', epoch_test_auc, epoch)
                writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)


                t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
                              train_loss=epoch_train_loss, val_loss=epoch_val_loss,
                              train_AUC=epoch_train_auc, val_AUC=epoch_val_auc,
                              test_AUC=epoch_test_auc)


                per_epoch_time.append(time.time()-start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch-1:
                        os.remove(file)

                scheduler.step(epoch_val_loss)

                current_lr = optimizer.param_groups[0]['lr']
                if current_lr < prev_lr:
                    print(f"Learning rate dropped to {current_lr}")
                prev_lr = current_lr

                if current_lr < params['min_lr']:
                    print("\n!! LR EQUAL TO MIN LR SET.")
                    break


                # Stop training after params['max_time'] hours
                if time.time()-t0 > params['max_time']*3600:
                    print('-' * 89)
                    print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
                    break

    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early because of KeyboardInterrupt')

    #Return test and train metrics at best val metric
    index = epoch_val_AUCs.index(max(epoch_val_AUCs))

    test_auc = epoch_test_AUCs[index]
    train_auc = epoch_train_AUCs[index]

    print("Test AUC: {:.4f}".format(test_auc))
    print("Train AUC: {:.4f}".format(train_auc))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-t0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()

    """
        Write the results in out_dir/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST AUC: {:.4f}\nTRAIN AUC: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_auc, train_auc, epoch, (time.time()-t0)/3600, np.mean(per_epoch_time)))


def main():
    """
        USER CONTROLS
    """


    parser = argparse.ArgumentParser()
    parser.add_argument('--config', help="Please give a config.json file with training/model/data/param details")
    parser.add_argument('--gpu_id', help="Please give a value for gpu id")
    parser.add_argument('--model', help="Please give a value for model name")
    parser.add_argument('--dataset', help="Please give a value for dataset name")
    parser.add_argument('--out_dir', help="Please give a value for out_dir")
    parser.add_argument('--seed', help="Please give a value for seed")
    parser.add_argument('--epochs', help="Please give a value for epochs")
    parser.add_argument('--batch_size', help="Please give a value for batch_size")
    parser.add_argument('--init_lr', help="Please give a value for init_lr")
    parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
    parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
    parser.add_argument('--min_lr', help="Please give a value for min_lr")
    parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
    parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
    parser.add_argument('--max_time', help="Please give a value for max_time")

    #Model details
    parser.add_argument('--full_graph', help="Please give a value for full_graph")
    parser.add_argument('--gamma', help="Please give a value for gamma")
    parser.add_argument('--m', help="Please give a value for m")

    parser.add_argument('--LPE', help="Please give a value for LPE")
    parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
    parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
    parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")

    parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
    parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
    parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
    parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")

    parser.add_argument('--residual', help="Please give a value for readout")
    parser.add_argument('--readout', help="Please give a value for readout")
    parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
    parser.add_argument('--dropout', help="Please give a value for dropout")
    parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
    parser.add_argument('--batch_norm', help="Please give a value for batch_norm")

    args = parser.parse_args()

    with open(args.config) as f:
        config = json.load(f)

    # device
    if args.gpu_id is not None:
        config['gpu']['id'] = int(args.gpu_id)
        config['gpu']['use'] = True
    device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
    # model, dataset, out_dir
    if args.model is not None:
        MODEL_NAME = args.model
    else:
        MODEL_NAME = config['model']
    if args.dataset is not None:
        DATASET_NAME = args.dataset
    else:
        DATASET_NAME = config['dataset']
    dataset = LoadData(DATASET_NAME)


    if args.out_dir is not None:
        out_dir = args.out_dir
    else:
        out_dir = config['out_dir']
    # parameters
    params = config['params']
    if args.seed is not None:
        params['seed'] = int(args.seed)
    if args.epochs is not None:
        params['epochs'] = int(args.epochs)
    if args.batch_size is not None:
        params['batch_size'] = int(args.batch_size)
    if args.init_lr is not None:
        params['init_lr'] = float(args.init_lr)
    if args.lr_reduce_factor is not None:
        params['lr_reduce_factor'] = float(args.lr_reduce_factor)
    if args.lr_schedule_patience is not None:
        params['lr_schedule_patience'] = int(args.lr_schedule_patience)
    if args.min_lr is not None:
        params['min_lr'] = float(args.min_lr)
    if args.weight_decay is not None:
        params['weight_decay'] = float(args.weight_decay)
    if args.print_epoch_interval is not None:
        params['print_epoch_interval'] = int(args.print_epoch_interval)
    if args.max_time is not None:
        params['max_time'] = float(args.max_time)


    # model parameters
    net_params = config['net_params']
    net_params['device'] = device
    net_params['gpu_id'] = config['gpu']['id']
    net_params['batch_size'] = params['batch_size']


    if args.full_graph is not None:
        net_params['full_graph'] = True if args.full_graph=='True' else False
    if args.gamma is not None:
        net_params['gamma'] = float(args.gamma)
    if args.m is not None:
        net_params['m'] = int(args.m)


    if args.LPE is not None:
        net_params['LPE'] = args.LPE


    if net_params['LPE'] not in ['node', 'edge', 'none']:
        print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
        exit()

    if args.LPE_layers is not None:
        net_params['LPE_layers'] = int(args.LPE_layers)
    if args.LPE_dim is not None:
        net_params['LPE_dim'] = int(args.LPE_dim)
    if args.LPE_n_heads is not None:
        net_params['LPE_n_heads'] = int(args.LPE_n_heads)

    if args.GT_layers is not None:
        net_params['GT_layers'] = int(args.GT_layers)
    if args.GT_hidden_dim is not None:
        net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
    if args.GT_out_dim is not None:
        net_params['GT_out_dim'] = int(args.GT_out_dim)
    if args.GT_n_heads is not None:
        net_params['GT_n_heads'] = int(args.GT_n_heads)


    if args.residual is not None:
        net_params['residual'] = True if args.residual=='True' else False
    if args.readout is not None:
        net_params['readout'] = args.readout
    if args.in_feat_dropout is not None:
        net_params['in_feat_dropout'] = float(args.in_feat_dropout)
    if args.dropout is not None:
        net_params['dropout'] = float(args.dropout)
    if args.layer_norm is not None:
        net_params['layer_norm'] = True if args.layer_norm=='True' else False
    if args.batch_norm is not None:
        net_params['batch_norm'] = True if args.batch_norm=='True' else False


    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file

    if not os.path.exists(out_dir + 'results'):
        os.makedirs(out_dir + 'results')

    if not os.path.exists(out_dir + 'configs'):
        os.makedirs(out_dir + 'configs')

    train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)


main()


================================================
FILE: main_molpcba.py
================================================
"""
    IMPORTING LIBS
"""
import dgl

import numpy as np
import os
import socket
import time
import random
import glob
import argparse, json
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils.data import DataLoader

from tensorboardX import SummaryWriter
from tqdm import tqdm


"""
    IMPORTING CUSTOM MODULES/METHODS
"""
from nets.molpcba.load_net import gnn_model
from data.data import LoadData

torch.set_default_dtype(torch.float32)

"""
    GPU Setup
"""
def gpu_setup(use_gpu, gpu_id):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)

    if torch.cuda.is_available() and use_gpu:
        print('cuda available with GPU:',torch.cuda.get_device_name(0))
        device = torch.device("cuda")
    else:
        print('cuda not available')
        device = torch.device("cpu")
    return device


"""
    VIEWING ENCODING TYPE AND NUM PARAMS
"""
def view_model_param(LPE, net_params):
    model = gnn_model(LPE, net_params)
    total_param = 0
    print("MODEL DETAILS:\n")
    for param in model.parameters():
        total_param += np.prod(list(param.data.size()))

    if LPE == 'edge':
        print('Encoding Type/Total parameters:', 'Edge Laplace Encoding/', total_param)
    elif LPE == 'node':
        print('Encoding Type/Total parameters:', 'Node Laplace Encoding', total_param)
    else:
        print('Encoding Type/Total parameters:', 'None', total_param)
    return total_param


"""
    TRAINING CODE
"""

def train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs):
    t0 = time.time()
    per_epoch_time = []

    DATASET_NAME = dataset.name

    if net_params['LPE'] in ['edge', 'node']:
        st = time.time()
        print("[!] Computing Laplace Decompositions..")
        dataset._laplace_decomp(net_params['m'])
        print('Time LapPE:',time.time()-st)

    if net_params['full_graph']:
        st = time.time()
        print("[!] Adding full graph connectivity..")
        dataset._make_full_graph()
        print('Time taken to convert to full graphs:',time.time()-st)

    if net_params['LPE'] == 'edge':
        st = time.time()
        print("[!] Computing edge Laplace features..")
        dataset._add_edge_laplace_feats()
        print('Time taken to compute edge Laplace features: ',time.time()-st)


    net_params['total_param'] = view_model_param(net_params['LPE'], net_params)

    trainset, valset, testset = dataset.train, dataset.val, dataset.test


    root_log_dir, root_ckpt_dir, write_file_name, write_config_file = dirs
    device = net_params['device']

    # Write the network and optimization hyper-parameters in folder config/
    with open(write_config_file + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n\nTotal Parameters: {}\n\n"""                .format(DATASET_NAME, MODEL_NAME, params, net_params, net_params['total_param']))

    log_dir = os.path.join(root_log_dir, "RUN_" + str(0))
    writer = SummaryWriter(log_dir=log_dir)

    # setting seeds
    random.seed(params['seed'])
    np.random.seed(params['seed'])
    torch.manual_seed(params['seed'])
    if device.type == 'cuda':
        torch.cuda.manual_seed(params['seed'])

    print("Training Graphs: ", len(trainset))
    print("Validation Graphs: ", len(valset))
    print("Test Graphs: ", len(testset))

    model = gnn_model(net_params['LPE'], net_params)
    model = model.to(device=device)

    optimizer = optim.Adam(model.parameters(), lr=params['init_lr'], weight_decay=params['weight_decay'])
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                     factor=params['lr_reduce_factor'],
                                                     patience=params['lr_schedule_patience'],
                                                     verbose=True)

    epoch_train_losses, epoch_val_losses = [], []
    epoch_train_APs, epoch_val_APs, epoch_test_APs = [], [], []


    from train.train_molpcba import train_epoch, evaluate_network


    train_loader = DataLoader(trainset, batch_size=params['batch_size'], shuffle=True, collate_fn=dataset.collate)
    val_loader = DataLoader(valset, batch_size=params['batch_size'],  shuffle=False, collate_fn=dataset.collate)
    test_loader = DataLoader(testset, batch_size=params['batch_size'],  shuffle=False, collate_fn=dataset.collate)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        with tqdm(range(params['epochs'])) as t:
            for epoch in t:

                t.set_description('Epoch %d' % epoch)

                start = time.time()

                epoch_train_loss, epoch_train_ap, optimizer = train_epoch(model, optimizer, device, train_loader, epoch, net_params['LPE'], params["batch_accumulation"])

                epoch_val_loss, epoch_val_ap = evaluate_network(model, device, val_loader, epoch, net_params['LPE'])
                _, epoch_test_ap = evaluate_network(model, device, test_loader, epoch, net_params['LPE'])

                epoch_train_losses.append(epoch_train_loss)
                epoch_val_losses.append(epoch_val_loss)
                epoch_train_APs.append(epoch_train_ap)
                epoch_val_APs.append(epoch_val_ap)

                epoch_test_APs.append(epoch_test_ap)

                writer.add_scalar('train/_loss', epoch_train_loss, epoch)
                writer.add_scalar('val/_loss', epoch_val_loss, epoch)
                writer.add_scalar('train/_AP', epoch_train_ap, epoch)
                writer.add_scalar('val/_AP', epoch_val_ap, epoch)
                writer.add_scalar('test/_AP', epoch_test_ap, epoch)
                writer.add_scalar('learning_rate', optimizer.param_groups[0]['lr'], epoch)


                t.set_postfix(time=time.time()-start, lr=optimizer.param_groups[0]['lr'],
                              train_loss=epoch_train_loss, val_loss=epoch_val_loss,
                              train_AP=epoch_train_ap, val_AP=epoch_val_ap,
                              test_AP=epoch_test_ap)


                per_epoch_time.append(time.time()-start)

                # Saving checkpoint
                ckpt_dir = os.path.join(root_ckpt_dir, "RUN_")
                if not os.path.exists(ckpt_dir):
                    os.makedirs(ckpt_dir)
                torch.save(model.state_dict(), '{}.pkl'.format(ckpt_dir + "/epoch_" + str(epoch)))

                files = glob.glob(ckpt_dir + '/*.pkl')
                for file in files:
                    epoch_nb = file.split('_')[-1]
                    epoch_nb = int(epoch_nb.split('.')[0])
                    if epoch_nb < epoch-1:
                        os.remove(file)

                scheduler.step(-epoch_val_ap)

                if optimizer.param_groups[0]['lr'] < params['min_lr']:
                    print("\n!! LR EQUAL TO MIN LR SET.")
                    break

                # Stop training after params['max_time'] hours
                if time.time()-t0 > params['max_time']*3600:
                    print('-' * 89)
                    print("Max_time for training elapsed {:.2f} hours, so stopping".format(params['max_time']))
                    break

    except Exception as e: # Sometimes there's out of memory error after many epochs
        print('-' * 89)
        print(f'Exiting from training early Exception: {e}')

    except KeyboardInterrupt: # Sometimes there's out of memory error after many epochs
        print('-' * 89)
        print(f'Exiting from training keyboard interrupt')


    #Return test and train metrics at best val metric
    index = epoch_val_APs.index(max(epoch_val_APs))

    test_ap = epoch_test_APs[index]
    val_ap = epoch_val_APs[index]
    train_ap = epoch_train_APs[index]

    print("Test AP: {:.4f}".format(test_ap))
    print("Val AP: {:.4f}".format(val_ap))
    print("Train AP: {:.4f}".format(train_ap))
    print("Best epoch index: {:.4f}".format(index))
    print("Convergence Time (Epochs): {:.4f}".format(epoch))
    print("TOTAL TIME TAKEN: {:.4f}s".format(time.time()-t0))
    print("AVG TIME PER EPOCH: {:.4f}s".format(np.mean(per_epoch_time)))

    writer.close()

    """
        Write the results in out_dir/results folder
    """
    with open(write_file_name + '.txt', 'w') as f:
        f.write("""Dataset: {},\nModel: {}\n\nparams={}\n\nnet_params={}\n\n{}\n\nTotal Parameters: {}\n\n
    FINAL RESULTS\nTEST AP: {:.4f}\nTRAIN AP: {:.4f}\n\n
    Convergence Time (Epochs): {:.4f}\nTotal Time Taken: {:.4f} hrs\nAverage Time Per Epoch: {:.4f} s\n\n\n"""\
          .format(DATASET_NAME, MODEL_NAME, params, net_params, model, net_params['total_param'],
                  test_ap, train_ap, epoch, (time.time()-t0)/3600, np.mean(per_epoch_time)))


def main():
    """
        USER CONTROLS
    """


    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default="configs/MOLPCBA/optimized", help="Please give a config.json file with training/model/data/param details")
    parser.add_argument('--gpu_id', help="Please give a value for gpu id")
    parser.add_argument('--model', help="Please give a value for model name")
    parser.add_argument('--dataset', help="Please give a value for dataset name")
    parser.add_argument('--out_dir', help="Please give a value for out_dir")
    parser.add_argument('--seed', help="Please give a value for seed")
    parser.add_argument('--epochs', help="Please give a value for epochs")
    parser.add_argument('--batch_size', help="Please give a value for batch_size")
    parser.add_argument('--init_lr', help="Please give a value for init_lr")
    parser.add_argument('--lr_reduce_factor', help="Please give a value for lr_reduce_factor")
    parser.add_argument('--lr_schedule_patience', help="Please give a value for lr_schedule_patience")
    parser.add_argument('--min_lr', help="Please give a value for min_lr")
    parser.add_argument('--weight_decay', help="Please give a value for weight_decay")
    parser.add_argument('--print_epoch_interval', help="Please give a value for print_epoch_interval")
    parser.add_argument('--max_time', help="Please give a value for max_time")

    #Model details
    parser.add_argument('--full_graph', help="Please give a value for full_graph")
    parser.add_argument('--gamma', help="Please give a value for gamma")
    parser.add_argument('--m', help="Please give a value for m")

    parser.add_argument('--LPE', help="Please give a value for LPE")
    parser.add_argument('--LPE_layers', help="Please give a value for LPE_layers")
    parser.add_argument('--LPE_dim', help="Please give a value for LPE_dim")
    parser.add_argument('--LPE_n_heads', help="Please give a value for LPE_n_heads")
    
    parser.add_argument('--extra_mlp', help="Please give a value for extra_mlp")

    parser.add_argument('--GT_layers', help="Please give a value for GT_layers")
    parser.add_argument('--GT_hidden_dim', help="Please give a value for GT_hidden_dim")
    parser.add_argument('--GT_out_dim', help="Please give a value for GT_out_dim")
    parser.add_argument('--GT_n_heads', help="Please give a value for GT_n_heads")

    parser.add_argument('--residual', help="Please give a value for readout")
    parser.add_argument('--readout', help="Please give a value for readout")
    parser.add_argument('--in_feat_dropout', help="Please give a value for in_feat_dropout")
    parser.add_argument('--dropout', help="Please give a value for dropout")
    parser.add_argument('--layer_norm', help="Please give a value for layer_norm")
    parser.add_argument('--batch_norm', help="Please give a value for batch_norm")

    args = parser.parse_args()

    with open(args.config) as f:
        config = json.load(f)

    # device
    if args.gpu_id is not None:
        config['gpu']['id'] = int(args.gpu_id)
        config['gpu']['use'] = True
    device = gpu_setup(config['gpu']['use'], config['gpu']['id'])
    # model, dataset, out_dir
    if args.model is not None:
        MODEL_NAME = args.model
    else:
        MODEL_NAME = config['model']
    if args.dataset is not None:
        DATASET_NAME = args.dataset
    else:
        DATASET_NAME = config['dataset']
    dataset = LoadData(DATASET_NAME)


    if args.out_dir is not None:
        out_dir = args.out_dir
    else:
        out_dir = config['out_dir']
    # parameters
    params = config['params']
    if args.seed is not None:
        params['seed'] = int(args.seed)
    if args.epochs is not None:
        params['epochs'] = int(args.epochs)
    if args.batch_size is not None:
        params['batch_size'] = int(args.batch_size)
    if args.init_lr is not None:
        params['init_lr'] = float(args.init_lr)
    if args.lr_reduce_factor is not None:
        params['lr_reduce_factor'] = float(args.lr_reduce_factor)
    if args.lr_schedule_patience is not None:
        params['lr_schedule_patience'] = int(args.lr_schedule_patience)
    if args.min_lr is not None:
        params['min_lr'] = float(args.min_lr)
    if args.weight_decay is not None:
        params['weight_decay'] = float(args.weight_decay)
    if args.print_epoch_interval is not None:
        params['print_epoch_interval'] = int(args.print_epoch_interval)
    if args.max_time is not None:
        params['max_time'] = float(args.max_time)


    # model parameters
    net_params = config['net_params']
    net_params['device'] = device
    net_params['gpu_id'] = config['gpu']['id']
    net_params['batch_size'] = params['batch_size']


    if args.full_graph is not None:
        net_params['full_graph'] = True if args.full_graph=='True' else False
    if args.gamma is not None:
        net_params['gamma'] = float(args.gamma)
    if args.m is not None:
        net_params['m'] = int(args.m)


    if args.LPE is not None:
        net_params['LPE'] = args.LPE
        
    if args.extra_mlp is not None:
        net_params['extra_mlp'] = args.extra_mlp


    if net_params['LPE'] not in ['node', 'edge', 'none']:
        print('[!] User did not provide a valid input argument for \'LPE\'. Valid inputs are \'node\', \'edge\', and \'none\'.')
        exit()

    if args.LPE_layers is not None:
        net_params['LPE_layers'] = int(args.LPE_layers)
    if args.LPE_dim is not None:
        net_params['LPE_dim'] = int(args.LPE_dim)
    if args.LPE_n_heads is not None:
        net_params['LPE_n_heads'] = int(args.LPE_n_heads)

    if args.GT_layers is not None:
        net_params['GT_layers'] = int(args.GT_layers)
    if args.GT_hidden_dim is not None:
        net_params['GT_hidden_dim'] = int(args.GT_hidden_dim)
    if args.GT_out_dim is not None:
        net_params['GT_out_dim'] = int(args.GT_out_dim)
    if args.GT_n_heads is not None:
        net_params['GT_n_heads'] = int(args.GT_n_heads)


    if args.residual is not None:
        net_params['residual'] = True if args.residual=='True' else False
    if args.readout is not None:
        net_params['readout'] = args.readout
    if args.in_feat_dropout is not None:
        net_params['in_feat_dropout'] = float(args.in_feat_dropout)
    if args.dropout is not None:
        net_params['dropout'] = float(args.dropout)
    if args.layer_norm is not None:
        net_params['layer_norm'] = True if args.layer_norm=='True' else False
    if args.batch_norm is not None:
        net_params['batch_norm'] = True if args.batch_norm=='True' else False


    root_log_dir = out_dir + 'logs/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    root_ckpt_dir = out_dir + 'checkpoints/' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_file_name = out_dir + 'results/result_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    write_config_file = out_dir + 'configs/config_' + MODEL_NAME + "_" + DATASET_NAME + "_GPU" + str(config['gpu']['id']) + "_" + time.strftime('%Hh%Mm%Ss_on_%b_%d_%Y')
    dirs = root_log_dir, root_ckpt_dir, write_file_name, write_config_file

    if not os.path.exists(out_dir + 'results'):
        os.makedirs(out_dir + 'results')

    if not os.path.exists(out_dir + 'configs'):
        os.makedirs(out_dir + 'configs')

    train_val_pipeline(MODEL_NAME, dataset, params, net_params, dirs)

main()


================================================
FILE: misc/download_datasets.md
================================================
# Download datasets

All the datasets work with DGL 0.5.x or later. Please update the environment using the yml files in the root directory if the use of these datasets throw error(s).


<br>

## 1. ZINC molecular dataset
ZINC size is 58.9MB.  

```
# At the root of the project
cd data/ 
bash script_download_molecules.sh
```
Script [script_download_molecules.sh](../data/script_download_molecules.sh) is located here. Refer to [benchmarking-gnns repo](https://github.com/graphdeeplearning/benchmarking-gnns) for details on preparation.


<br>

## 2. PATTERN/CLUSTER SBM datasets
PATTERN size is 1.98GB and CLUSTER size is 1.26GB.

```
# At the root of the project
cd data/ 
bash script_download_SBMs.sh
```
Script [script_download_SBMs.sh](../data/script_download_SBMs.sh) is located here. Refer to [benchmarking-gnns repo](https://github.com/graphdeeplearning/benchmarking-gnns) for details on preparation.

<br>

## 3. All BGNN datasets

```
# At the root of the project
cd data/ 
bash script_download_all_datasets.sh
```

Script [script_download_all_datasets.sh](../data/script_download_all_datasets.sh) is located here. 

<br>

## 4. MolHIV OGB dataset

```
# Ensure OGB is installed: 
pip install ogb
```
If properly installed, the dataset will automatically be downloaded and saved to the ```dataset/``` folder after running a MolHIV experiment.

<br><br><br>


================================================
FILE: misc/env_installation.md
================================================
# Benchmark installation


<br>

## 1. Setup Conda

```
# Conda installation

# For Linux
curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh

# For OSX
curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh

chmod +x ~/miniconda.sh    
./miniconda.sh  

source ~/.bashrc          # For Linux
source ~/.bash_profile    # For OSX
```


<br>

## 2. Setup Python environment for CPU

```
# Clone GitHub repo
conda install git
git clone https://github.com/DevinKreuzer/SAN.git
cd SAN

# Install python environment
# using pip
pip install -r requirements.txt

# using Conda
conda create --name <env_name> --file requirements.txt
```


<br>

## 3. Setup Python environment for GPU

DGL 0.5.x requires CUDA **10.2**.

For Ubuntu **18.04**

```
# Setup CUDA 10.2 on Ubuntu 18.04
sudo apt-get --purge remove "*cublas*" "cuda*"
sudo apt --purge remove "nvidia*"
sudo apt autoremove
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1804_10.2.89-1_amd64.deb
sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo apt update
sudo apt install -y cuda-10-2
sudo reboot
cat /usr/local/cuda/version.txt # Check CUDA version is 10.2

# Clone GitHub repo
conda install git
git clone https://github.com/DevinKreuzer/SAN.git
cd SAN

# Install python environment
# using pip
pip install -r requirements.txt

# using Conda
conda create --name <env_name> --file requirements.txt
```


<br><br><br>


================================================
FILE: nets/SBMs_node_classification/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN(nn.Module):

    def __init__(self, net_params):
        super().__init__()


        in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
        self.n_classes = net_params['n_classes']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim)
        self.embedding_e = nn.Embedding(2, GT_hidden_dim)

        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))

        self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)


    def forward(self, g, h, e):
        
        # input embedding
        h=self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)   
        
        
        # GraphTransformer Layers
        for conv in self.layers:
            h, e = conv(g, h, e)
            
        # output
        h_out = self.MLP_layer(h)

        return h_out
    
    
    def loss(self, pred, label):

        # calculating label weights for weighted loss computation
        V = label.size(0)
        label_count = torch.bincount(label)
        label_count = label_count[label_count.nonzero()].squeeze()
        cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
        cluster_sizes[torch.unique(label)] = label_count
        weight = (V - cluster_sizes).float() / V
        weight *= (cluster_sizes>0).float()
        
        # weighted cross-entropy for unbalanced classes
        criterion = nn.CrossEntropyLoss(weight=weight)
        loss = criterion(pred, label)

        return loss


================================================
FILE: nets/SBMs_node_classification/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_EdgeLPE(nn.Module):

    def __init__(self, net_params):
        super().__init__()

        in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
        self.n_classes = net_params['n_classes']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim)
        self.embedding_e = nn.Embedding(2, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.linear_A = nn.Linear(3, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)


    def forward(self, g, h, e, diff, product, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)        
          
        PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
        empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
        PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3

        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
            
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 

        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim

        #Concatenate learned PE to input embedding
        e = torch.cat((e, PosEnc), 1)
        
        
        # GraphTransformer Layers
        for conv in self.layers:
            h, e = conv(g, h, e)
            
        # output
        h_out = self.MLP_layer(h)

        return h_out
    
    
    def loss(self, pred, label):

        # calculating label weights for weighted loss computation
        V = label.size(0)
        label_count = torch.bincount(label)
        label_count = label_count[label_count.nonzero()].squeeze()
        cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
        cluster_sizes[torch.unique(label)] = label_count
        weight = (V - cluster_sizes).float() / V
        weight *= (cluster_sizes>0).float()
        
        # weighted cross-entropy for unbalanced classes
        criterion = nn.CrossEntropyLoss(weight=weight)
        loss = criterion(pred, label)

        return loss


================================================
FILE: nets/SBMs_node_classification/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_NodeLPE(nn.Module):

    def __init__(self, net_params):
        super().__init__()

        in_dim_node = net_params['in_dim'] # node_dim (feat is an integer)
        self.n_classes = net_params['n_classes']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(in_dim_node, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.embedding_e = nn.Embedding(2, GT_hidden_dim)
        self.linear_A = nn.Linear(2, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))

        self.MLP_layer = MLPReadout(GT_out_dim, self.n_classes)


    def forward(self, g, h, e, EigVecs, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        e = self.embedding_e(e) 
          
        PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2).float() # (Num nodes) x (Num Eigenvectors) x 2
        empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2
        
        PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num nodes) x 2
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim
        
        
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 
        
        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False)
        
        #Concatenate learned PE to input embedding
        h = torch.cat((h, PosEnc), 1)
        
        h = self.in_feat_dropout(h)
        
        # GraphTransformer Layers
        for conv in self.layers:
            h, e = conv(g, h, e)
            
        # output
        h_out = self.MLP_layer(h)

        return h_out
    
    
    def loss(self, pred, label):

        # calculating label weights for weighted loss computation
        V = label.size(0)
        label_count = torch.bincount(label)
        label_count = label_count[label_count.nonzero()].squeeze()
        cluster_sizes = torch.zeros(self.n_classes).long().to(self.device)
        cluster_sizes[torch.unique(label)] = label_count
        weight = (V - cluster_sizes).float() / V
        weight *= (cluster_sizes>0).float()
        
        # weighted cross-entropy for unbalanced classes
        criterion = nn.CrossEntropyLoss(weight=weight)
        loss = criterion(pred, label)

        return loss


================================================
FILE: nets/SBMs_node_classification/load_net.py
================================================
"""
    Utility file to select GraphNN model as
    selected by the user
"""

from nets.SBMs_node_classification.SAN_NodeLPE import SAN_NodeLPE
from nets.SBMs_node_classification.SAN_EdgeLPE import SAN_EdgeLPE
from nets.SBMs_node_classification.SAN import SAN


def NodeLPE(net_params):
    return SAN_NodeLPE(net_params)

def EdgeLPE(net_params):
    return SAN_EdgeLPE(net_params)

def NoLPE(net_params):
    return SAN(net_params)

def gnn_model(LPE, net_params):
    model = {
        'edge': EdgeLPE,
        'node': NodeLPE,
        'none': NoLPE
    }
        
    return model[LPE](net_params)

================================================
FILE: nets/ZINC_graph_regression/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        num_atom_type = net_params['num_atom_type']
        num_bond_type = net_params['num_bond_type']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim)
        self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   # 1 out dim since regression problem        
        
        
    def forward(self, g, h, e):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)        
        
        # GNN
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        return self.MLP_layer(hg)
        
    def loss(self, scores, targets):

        loss = nn.L1Loss()(scores, targets)
        
        return loss


================================================
FILE: nets/ZINC_graph_regression/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_EdgeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        num_atom_type = net_params['num_atom_type']
        num_bond_type = net_params['num_bond_type']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim)
        self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.linear_A = nn.Linear(3, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   # 1 out dim since regression problem        
        
        
    def forward(self, g, h, e, diff, product, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)        
          
        PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
        empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
        PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3

        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
            
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 

        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim

        #Concatenate learned PE to input embedding
        e = torch.cat((e, PosEnc), 1)
        
        
        # GNN
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        return self.MLP_layer(hg)
        
    def loss(self, scores, targets):

        loss = nn.L1Loss()(scores, targets)
        
        return loss


================================================
FILE: nets/ZINC_graph_regression/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

import dgl
import numpy as np

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_NodeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        num_atom_type = net_params['num_atom_type']
        num_bond_type = net_params['num_bond_type']
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = nn.Embedding(num_atom_type, GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.embedding_e = nn.Embedding(num_bond_type, GT_hidden_dim)
        self.linear_A = nn.Linear(2, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   # 1 out dim since regression problem        
        
        
    def forward(self, g, h, e, EigVecs, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        e = self.embedding_e(e)  
        
        PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2).float() # (Num nodes) x (Num Eigenvectors) x 2
        empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2
        
        PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num nodes) x 2
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim
        
        
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 
        
        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False)
        
        #Concatenate learned PE to input embedding
        h = torch.cat((h, PosEnc), 1)
        
        h = self.in_feat_dropout(h)
        
        
        # GNN
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        return self.MLP_layer(hg)
        
    def loss(self, scores, targets):

        loss = nn.L1Loss()(scores, targets)
        
        return loss


================================================
FILE: nets/ZINC_graph_regression/load_net.py
================================================

from nets.ZINC_graph_regression.SAN_NodeLPE import SAN_NodeLPE
from nets.ZINC_graph_regression.SAN_EdgeLPE import SAN_EdgeLPE
from nets.ZINC_graph_regression.SAN import SAN


def NodeLPE(net_params):
    return SAN_NodeLPE(net_params)

def EdgeLPE(net_params):
    return SAN_EdgeLPE(net_params)

def NoLPE(net_params):
    return SAN(net_params)

def gnn_model(LPE, net_params):
    model = {
        'edge': EdgeLPE,
        'node': NodeLPE,
        'none': NoLPE
    }
        
    return model[LPE](net_params)

================================================
FILE: nets/molhiv_graph_regression/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim)
        self.embedding_e = BondEncoder(emb_dim = GT_hidden_dim)

        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ]) 
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   #  out dim for probability     
        
        
    def forward(self, g, h, e):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)  
          
        # Second Transformer
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        sig = nn.Sigmoid()
    
        return sig(self.MLP_layer(hg))
        
    def loss(self, scores, targets):
        
        loss = nn.BCELoss()
        
        l = loss(scores.float(), targets.float())
        
        return l


================================================
FILE: nets/molhiv_graph_regression/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_EdgeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim)
        self.embedding_e = BondEncoder(emb_dim = GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.linear_A = nn.Linear(3, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))   
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   #  out dim for probability     
        
        
    def forward(self, g, h, e, diff, product, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)        
          
        PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
        empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
        PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3

        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
            
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 

        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim

        #Concatenate learned PE to input embedding
        e = torch.cat((e, PosEnc), 1)
        
        
        # GNN
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        sig = nn.Sigmoid()
    
        return sig(self.MLP_layer(hg))
        
    def loss(self, scores, targets):
        
        loss = nn.BCELoss()
        
        l = loss(scores.float(), targets.float())
        
        
        return l


================================================
FILE: nets/molhiv_graph_regression/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_NodeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim-LPE_dim) #Remove some embedding dimensions to make room for concatenating LPE
        self.embedding_e_real = BondEncoder(emb_dim = GT_hidden_dim)
        self.embedding_e_fake = nn.Embedding(1, GT_hidden_dim)
        self.linear_A = nn.Linear(2, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 1)   # 1 out dim for probability      
        

    def forward(self, g, h, e, EigVecs, EigVals):
        

        # input embedding
        h = self.embedding_h(h)
        e = self.embedding_e_real(e)
        
        PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2).float() # (Num nodes) x (Num Eigenvectors) x 2
        empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2
        
        PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num nodes) x 2
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim
        
        
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 
        
        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False)
        
        #Concatenate learned PE to input embedding
        h = torch.cat((h, PosEnc), 1)

        h = self.in_feat_dropout(h)
          
        # Second Transformer
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        sig = nn.Sigmoid()
    
        return sig(self.MLP_layer(hg))
        
    def loss(self, scores, targets):
        
        loss = nn.BCELoss()
        
        l = loss(scores.float(), targets.float())
        
        return l


================================================
FILE: nets/molhiv_graph_regression/load_net.py
================================================

from nets.molhiv_graph_regression.SAN_NodeLPE import SAN_NodeLPE
from nets.molhiv_graph_regression.SAN_EdgeLPE import SAN_EdgeLPE
from nets.molhiv_graph_regression.SAN import SAN

def NodeLPE(net_params):
    return SAN_NodeLPE(net_params)

def EdgeLPE(net_params):
    return SAN_EdgeLPE(net_params)

def NoLPE(net_params):
    return SAN(net_params)

def gnn_model(LPE, net_params):
    model = {
        'edge': EdgeLPE,
        'node': NodeLPE,
        'none': NoLPE
    }
        
    return model[LPE](net_params)


================================================
FILE: nets/molpcba/SAN.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim)
        self.embedding_e = BondEncoder(emb_dim = GT_hidden_dim)

        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ]) 
        
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 128)   #  out dim for probability     
        
        
    def forward(self, g, h, e):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)  
          
        # Second Transformer
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        sig = nn.Sigmoid()
    
        return sig(self.MLP_layer(hg))
        
    def loss(self, scores, targets):
        
        loss = nn.BCELoss()
        
        l = loss(scores.float(), targets.float())
        
        return l


================================================
FILE: nets/molpcba/SAN_EdgeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features
    
"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_EdgeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()
        
        full_graph = net_params['full_graph']
        gamma = net_params['gamma']
        
        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']
        
        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']
        
        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)
        
        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim)
        self.embedding_e = BondEncoder(emb_dim = GT_hidden_dim-LPE_dim)#Remove some embedding dimensions to make room for concatenating laplace encoding
        self.linear_A = nn.Linear(3, LPE_dim)
        
        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)
        
        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])
        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))   
        self.MLP_layer = MLPReadout(GT_out_dim, 128)   #  out dim for probability     
        
        
    def forward(self, g, h, e, diff, product, EigVals):
        
        # input embedding
        h = self.embedding_h(h)
        h = self.in_feat_dropout(h)
        e = self.embedding_e(e)        
          
        PosEnc = torch.cat((diff, product, EigVals), 2) # (Num edges) x (Num Eigenvectors) x 3
        empty_mask = torch.isnan(PosEnc) # (Num edges) x (Num Eigenvectors) x 3
        PosEnc[empty_mask] = 0 # (Num edges) x (Num Eigenvectors) x 3

        PosEnc = torch.transpose(PosEnc, 0 ,1).float() # (Num Eigenvectors) x (Num edges) x 3
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num edges) x PE_dim
            
        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0]) 
        
        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan') 

        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False) # (Num edge) x PE_dim

        #Concatenate learned PE to input embedding
        e = torch.cat((e, PosEnc), 1)
        
        
        # GNN
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h
        
        
        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes
            
        sig = nn.Sigmoid()
    
        return sig(self.MLP_layer(hg))
        
    def loss(self, scores, targets):
        
        loss = nn.BCELoss()
        
        l = loss(scores.float(), targets.float())
        
        
        return l


================================================
FILE: nets/molpcba/SAN_NodeLPE.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl

from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

"""
    Graph Transformer with edge features

"""
from layers.graph_transformer_layer import GraphTransformerLayer
from layers.mlp_readout_layer import MLPReadout

class SAN_NodeLPE(nn.Module):
    def __init__(self, net_params):
        super().__init__()

        full_graph = net_params['full_graph']
        gamma = net_params['gamma']

        LPE_layers = net_params['LPE_layers']
        LPE_dim = net_params['LPE_dim']
        LPE_n_heads = net_params['LPE_n_heads']

        GT_layers = net_params['GT_layers']
        GT_hidden_dim = net_params['GT_hidden_dim']
        GT_out_dim = net_params['GT_out_dim']
        GT_n_heads = net_params['GT_n_heads']

        self.residual = net_params['residual']
        self.readout = net_params['readout']
        in_feat_dropout = net_params['in_feat_dropout']
        dropout = net_params['dropout']

        self.readout = net_params['readout']
        self.layer_norm = net_params['layer_norm']
        self.batch_norm = net_params['batch_norm']

        self.device = net_params['device']
        self.in_feat_dropout = nn.Dropout(in_feat_dropout)

        self.embedding_h = AtomEncoder(emb_dim = GT_hidden_dim-LPE_dim) #Remove some embedding dimensions to make room for concatenating LPE
        self.embedding_e_real = BondEncoder(emb_dim = GT_hidden_dim)
        
        #Optional extra MLP at beginning
        self.extra_mlp = net_params['extra_mlp']
        
        if self.extra_mlp:
            self.norm_node = nn.BatchNorm1d(GT_hidden_dim-LPE_dim)
            self.norm_edge = nn.BatchNorm1d(GT_hidden_dim)
            self.relu = nn.ReLU()
            self.linear_init_node = nn.Linear(GT_hidden_dim-LPE_dim, GT_hidden_dim-LPE_dim)
            self.linear_init_edge = nn.Linear(GT_hidden_dim, GT_hidden_dim)

        self.linear_A = nn.Linear(2, LPE_dim)

        encoder_layer = nn.TransformerEncoderLayer(d_model=LPE_dim, nhead=LPE_n_heads)
        self.PE_Transformer = nn.TransformerEncoder(encoder_layer, num_layers=LPE_layers)

        self.layers = nn.ModuleList([ GraphTransformerLayer(gamma, GT_hidden_dim, GT_hidden_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual) for _ in range(GT_layers-1) ])

        self.layers.append(GraphTransformerLayer(gamma, GT_hidden_dim, GT_out_dim, GT_n_heads, full_graph, dropout, self.layer_norm, self.batch_norm, self.residual))
        self.MLP_layer = MLPReadout(GT_out_dim, 128)   # 1 out dim for probability


    def forward(self, g, h, e, EigVecs, EigVals):

        # input embedding
        h = self.embedding_h(h)
        e = self.embedding_e_real(e)
        
        if self.extra_mlp:
            h = self.norm_node(h)
            h = self.relu(h)
            h = self.linear_init_node(h)
            
            e = self.norm_edge(e)
            e = self.relu(e)
            e = self.linear_init_edge(e)

        EigVecs = EigVecs.to(dtype=h.dtype)
        EigVals = EigVals.to(dtype=h.dtype)
        PosEnc = torch.cat((EigVecs.unsqueeze(2), EigVals), dim=2) # (Num nodes) x (Num Eigenvectors) x 2
        empty_mask = torch.isnan(PosEnc) # (Num nodes) x (Num Eigenvectors) x 2

        PosEnc[empty_mask] = 0 # (Num nodes) x (Num Eigenvectors) x 2
        PosEnc = torch.transpose(PosEnc, 0 ,1) # (Num Eigenvectors) x (Num nodes) x 2
        PosEnc = self.linear_A(PosEnc) # (Num Eigenvectors) x (Num nodes) x PE_dim


        #1st Transformer: Learned PE
        PosEnc = self.PE_Transformer(src=PosEnc, src_key_padding_mask=empty_mask[:,:,0])

        #remove masked sequences
        PosEnc[torch.transpose(empty_mask, 0 ,1)[:,:,0]] = float('nan')

        #Sum pooling
        PosEnc = torch.nansum(PosEnc, 0, keepdim=False)

        #Concatenate learned PE to input embedding
        h = torch.cat((h, PosEnc), 1)

        h = self.in_feat_dropout(h)

        # Second Transformer
        for conv in self.layers:
            h, e = conv(g, h, e)
        g.ndata['h'] = h

        if self.readout == "sum":
            hg = dgl.sum_nodes(g, 'h')
        elif self.readout == "max":
            hg = dgl.max_nodes(g, 'h')
        elif self.readout == "mean":
            hg = dgl.mean_nodes(g, 'h')
        else:
            hg = dgl.mean_nodes(g, 'h')  # default readout is mean nodes

        sig = nn.Sigmoid()

        return sig(self.MLP_layer(hg))

    def loss(self, scores, targets):

        loss = nn.BCELoss()

        l = loss(scores, targets.to(dtype=scores.dtype))

        return l


================================================
FILE: nets/molpcba/load_net.py
================================================

from nets.molpcba.SAN_NodeLPE import SAN_NodeLPE
from nets.molpcba.SAN_EdgeLPE import SAN_EdgeLPE
from nets.molpcba.SAN import SAN

def NodeLPE(net_params):
    return SAN_NodeLPE(net_params)

def EdgeLPE(net_params):
    return SAN_EdgeLPE(net_params)

def NoLPE(net_params):
    return SAN(net_params)

def gnn_model(LPE, net_params):
    model = {
        'edge': EdgeLPE,
        'node': NodeLPE,
        'none': NoLPE
    }
        
    return model[LPE](net_params)


================================================
FILE: requirements.txt
================================================
absl-py==0.11.0
argon2-cffi==20.1.0
ase==3.20.1
astunparse==1.6.3
async-generator==1.10
attrs==20.3.0
backcall==0.2.0
bleach==3.2.1
cachetools==4.1.1
certifi==2020.11.8
cffi==1.14.4
chardet==3.0.4
cycler==0.10.0
dataclasses==0.8
decorator==4.4.2
defusedxml==0.6.0
dgl==0.5.3
dgl-cu102==0.5.3
entrypoints==0.3
fsspec==0.8.4
future==0.18.2
gast==0.3.3
google-auth==1.23.0
google-auth-oauthlib==0.4.2
google-pasta==0.2.0
googledrivedownloader==0.4
grpcio==1.33.2
h5py==2.10.0
idna==2.10
importlib-metadata==3.1.0
ipykernel==5.3.4
ipython==7.16.1
ipython-genutils==0.2.0
ipywidgets==7.6.3
isodate==0.6.0
jedi==0.17.2
Jinja2==2.11.2
joblib==0.17.0
json5==0.9.5
jsonschema==3.2.0
jupyter-client==6.1.7
jupyter-core==4.7.0
jupyterlab==2.2.9
jupyterlab-pygments==0.1.2
jupyterlab-server==1.2.0
jupyterlab-widgets==1.0.0
Keras==2.4.3
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
littleutils==0.2.2
llvmlite==0.34.0
Markdown==3.3.3
MarkupSafe==1.1.1
matplotlib==3.3.3
mistune==0.8.4
nbclient==0.5.1
nbconvert==6.0.7
nbformat==5.0.8
nest-asyncio==1.4.3
networkx==2.5
notebook==6.1.5
numba==0.51.2
numpy==1.18.5
oauthlib==3.1.0
ogb==1.3.0
opt-einsum==3.3.0
outdated==0.2.1
packaging==20.4
pandas==1.1.4
pandocfilters==1.4.3
parso==0.7.1
pexpect==4.8.0
pickleshare==0.7.5
Pillow==8.0.1
prometheus-client==0.9.0
prompt-toolkit==3.0.8
protobuf==3.14.0
ptyprocess==0.6.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.20
Pygments==2.7.2
pyparsing==2.4.7
pyrsistent==0.17.3
python-dateutil==2.8.1
pytorch-lightning==1.0.8
pytz==2020.4
PyYAML==5.3.1
pyzmq==20.0.0
rdflib==5.0.0
requests==2.25.0
requests-oauthlib==1.3.0
rsa==4.6
scikit-learn==0.23.2
scipy==1.5.4
seaborn==0.11.0
Send2Trash==1.5.0
six==1.15.0
tensorboard==2.4.0
tensorboard-plugin-wit==1.7.0
tensorboardX==2.1
tensorflow-estimator==2.3.0
tensorflow-gpu==2.3.1
termcolor==1.1.0
terminado==0.9.1
testpath==0.4.4
threadpoolctl==2.1.0
torch==1.7.0
torch-cluster==1.5.8
torch-geometric==1.6.1
torch-scatter==2.0.5
torch-sparse==0.6.8
torch-spline-conv==1.2.0
torchvision==0.8.1
tornado==6.1
tqdm==4.52.0
traitlets==4.3.3
typing-extensions==3.7.4.3
urllib3==1.26.2
wcwidth==0.2.5
webencodings==0.5.1
Werkzeug==1.0.1
widgetsnbextension==3.5.1
wrapt==1.12.1
zipp==3.4.0


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-1
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-1/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-2
================================================
#!/bin/bash
cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-2/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-3
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-3/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-4
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-4/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-5
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-5/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-6
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-6/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-7
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-7/node'


================================================
FILE: scripts/CLUSTER/ablation/full_node_1e-8
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/1e-8/node'


================================================
FILE: scripts/CLUSTER/ablation/full_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/full/none'


================================================
FILE: scripts/CLUSTER/ablation/sparse_node
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/sparse/node'


================================================
FILE: scripts/CLUSTER/ablation/sparse_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/ablation/sparse/none'


================================================
FILE: scripts/CLUSTER/optimized/cluster_optimized
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 41 --config 'configs/CLUSTER/optimized'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 55 --config 'configs/CLUSTER/optimized'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 13 --config 'configs/CLUSTER/optimized'
python  main_SBMs_node_classification.py --dataset SBM_CLUSTER --seed 88 --config 'configs/CLUSTER/optimized'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-3
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-4
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-5
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-6
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-7
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'


================================================
FILE: scripts/MOLHIV/ablation/full_node_1e-8
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'


================================================
FILE: scripts/MOLHIV/ablation/full_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'


================================================
FILE: scripts/MOLHIV/ablation/sparse_node
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'


================================================
FILE: scripts/MOLHIV/ablation/sparse_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'


================================================
FILE: scripts/MOLHIV/optimized/molhiv_optimized
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/optimized'
python  main_molhiv.py --config 'configs/MOLHIV/optimized'
python  main_molhiv.py --config 'configs/MOLHIV/optimized'
python  main_molhiv.py --config 'configs/MOLHIV/optimized'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-3
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-3/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-4
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-4/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-5
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-5/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-6
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-6/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-7
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-7/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_node_1e-8
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/1e-8/node'


================================================
FILE: scripts/MOLPCBA/ablation/full_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/full/none'


================================================
FILE: scripts/MOLPCBA/ablation/sparse_node
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/node'


================================================
FILE: scripts/MOLPCBA/ablation/sparse_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'
python  main_molhiv.py --config 'configs/MOLHIV/ablation/sparse/none'


================================================
FILE: scripts/MOLPCBA/optimized/molpcba_optimized
================================================
#!/bin/bash

cd ~/SAN/

python  main_molpcba.py --config 'configs/MOLPCBA/optimized'
python  main_molpcba.py --config 'configs/MOLPCBA/optimized'
python  main_molpcba.py --config 'configs/MOLPCBA/optimized'
python  main_molpcba.py --config 'configs/MOLPCBA/optimized'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-1
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-1/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-1/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-2
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-2/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-2/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-3
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-3/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-3/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-4
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-4/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-4/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-5
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-5/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-5/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-6
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-6/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-6/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-7
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-7/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-7/node'


================================================
FILE: scripts/PATTERN/ablation/full_node_1e-8
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/1e-8/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/1e-8/node'


================================================
FILE: scripts/PATTERN/ablation/full_none
================================================
#!/bin/bash
cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/full/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/full/none'


================================================
FILE: scripts/PATTERN/ablation/sparse_node
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/sparse/node'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/sparse/node'


================================================
FILE: scripts/PATTERN/ablation/sparse_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/ablation/sparse/none'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/ablation/sparse/none'


================================================
FILE: scripts/PATTERN/optimized/pattern_optimized
================================================
#!/bin/bash

cd ~/SAN/

python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 41 --config 'configs/PATTERN/optimized'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 55 --config 'configs/PATTERN/optimized'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 13 --config 'configs/PATTERN/optimized'
python  main_SBMs_node_classification.py --dataset SBM_PATTERN --seed 88 --config 'configs/PATTERN/optimized'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-2
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-2/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-2/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-2/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-2/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-3
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-3/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-3/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-3/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-3/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-4
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-4/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-4/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-4/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-4/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-5
================================================
#!/bin/bash
cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-5/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-5/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-5/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-5/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-6
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-6/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-6/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-6/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-6/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-7
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-7/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-7/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-7/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-7/node'


================================================
FILE: scripts/ZINC/ablation/full_node_1e-8
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/1e-8/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/1e-8/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/1e-8/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/1e-8/node'


================================================
FILE: scripts/ZINC/ablation/full_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/full/none'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/full/none'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/full/none'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/full/none'


================================================
FILE: scripts/ZINC/ablation/sparse_node
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/sparse/node'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/sparse/node'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/sparse/node'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/sparse/node'


================================================
FILE: scripts/ZINC/ablation/sparse_none
================================================
#!/bin/bash

cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/ablation/sparse/none'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/ablation/sparse/none'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/ablation/sparse/none'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/ablation/sparse/none'


================================================
FILE: scripts/ZINC/optimized/zinc_optimized
================================================
#!/bin/bash
cd ~/SAN/

python  main_ZINC_graph_regression.py --seed 41 --config 'configs/ZINC/optimized'
python  main_ZINC_graph_regression.py --seed 55 --config 'configs/ZINC/optimized'
python  main_ZINC_graph_regression.py --seed 13 --config 'configs/ZINC/optimized'
python  main_ZINC_graph_regression.py --seed 88 --config 'configs/ZINC/optimized'


================================================
FILE: scripts/reproduce.md
================================================
# Reproducibility


<br>


All outputs will be sent to the ```/out``` directory from the root of the project in the folders described in the respective ```configs/``` files.


## 1. ZINC


```
# SOTA comparison results
bash ZINC/optimized/zinc_optimized

# Ablation results
bash ZINC/ablation/sparse_none
bash ZINC/ablation/sparse_node
bash ZINC/ablation/full_none
bash ZINC/ablation/full_node_1e-2
bash ZINC/ablation/full_node_1e-3
bash ZINC/ablation/full_node_1e-4
bash ZINC/ablation/full_node_1e-5
bash ZINC/ablation/full_node_1e-6
bash ZINC/ablation/full_node_1e-7
bash ZINC/ablation/full_node_1e-8
```
 <br>

## 2. PATTERN


```
# SOTA comparison results
bash PATTERN/optimized/pattern_optimized

# Ablation results
bash PATTERN/ablation/sparse_none
bash PATTERN/ablation/sparse_node
bash PATTERN/ablation/full_none
bash PATTERN/ablation/full_node_1e-1
bash PATTERN/ablation/full_node_1e-2
bash PATTERN/ablation/full_node_1e-3
bash PATTERN/ablation/full_node_1e-4
bash PATTERN/ablation/full_node_1e-5
bash PATTERN/ablation/full_node_1e-6
bash PATTERN/ablation/full_node_1e-7
bash PATTERN/ablation/full_node_1e-8
```
 <br>
 
 ## 3. CLUSTER


```
# SOTA comparison results
bash CLUSTER/optimized/cluster_optimized

# Ablation results
bash CLUSTER/ablation/sparse_none
bash CLUSTER/ablation/sparse_node
bash CLUSTER/ablation/full_none
bash CLUSTER/ablation/full_node_1e-1
bash CLUSTER/ablation/full_node_1e-2
bash CLUSTER/ablation/full_node_1e-3
bash CLUSTER/ablation/full_node_1e-4
bash CLUSTER/ablation/full_node_1e-5
bash CLUSTER/ablation/full_node_1e-6
bash CLUSTER/ablation/full_node_1e-7
bash CLUSTER/ablation/full_node_1e-8
```
 <br>
 
 ## 4. MolHIV


```
# SOTA comparison results
bash MOLHIV/optimized/molhiv_optimized

# Ablation results
bash MOLHIV/ablation/sparse_none
bash MOLHIV/ablation/sparse_node
bash MOLHIV/ablation/full_none
bash MOLHIV/ablation/full_node_1e-3
bash MOLHIV/ablation/full_node_1e-4
bash MOLHIV/ablation/full_node_1e-5
bash MOLHIV/ablation/full_node_1e-6
bash MOLHIV/ablation/full_node_1e-7
bash MOLHIV/ablation/full_node_1e-8
```


<br><br><br>


================================================
FILE: train/MetricWrapper.py
================================================
from typing import Union, Callable, Optional, Dict, Any
from copy import deepcopy
import torch
from torch import Tensor

class MetricWrapper:
    r"""
    Allows to initialize a metric from a name or Callable, and initialize the
    `Thresholder` in case the metric requires a threshold.
    """

    def __init__(
        self,
        metric: Union[str, Callable],
        target_nan_mask: Optional[Union[str, int]] = None,
        **kwargs,
    ):
        r"""
        Parameters
            metric:
                The metric to use. See `METRICS_DICT`

            target_nan_mask:

                - None: Do not change behaviour if there are NaNs

                - int, float: Value used to replace NaNs. For example, if `target_nan_mask==0`, then
                  all NaNs will be replaced by zeros

                - 'ignore-flatten': The Tensor will be reduced to a vector without the NaN values.

                - 'ignore-mean-label': NaNs will be ignored when computing the loss. Note that each column
                  has a different number of NaNs, so the metric will be computed separately
                  on each column, and the metric result will be averaged over all columns.
                  *This option might slowdown the computation if there are too many labels*

            kwargs:
                Other arguments to call with the metric
        """

        self.metric = metric
        self.target_nan_mask = target_nan_mask
        self.kwargs = kwargs

    def compute(self, preds: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        r"""
        Compute the metric, apply the thresholder if provided, and manage the NaNs
        """

        if preds.ndim == 1:
            preds = preds.unsqueeze(-1)

        if target.ndim == 1:
            target = target.unsqueeze(-1)

        target_nans = torch.isnan(target)

        # Manage the NaNs
        if self.target_nan_mask is None:
            pass
        elif isinstance(self.target_nan_mask, (int, float)):
            target = target.clone()
            target[torch.isnan(target)] = self.target_nan_mask
        elif self.target_nan_mask == "ignore-flatten":
            target = target[~target_nans]
            preds = preds[~target_nans]
        elif self.target_nan_mask == "ignore-mean-label":
            target_list = [target[..., ii][~target_nans[..., ii]] for ii in range(target.shape[-1])]
            preds_list = [preds[..., ii][~target_nans[..., ii]] for ii in range(preds.shape[-1])]
            target = target_list
            preds = preds_list
        else:
            raise ValueError(f"Invalid option `{self.target_nan_mask}`")

        if self.target_nan_mask == "ignore-mean-label":

            # Compute the metric for each column, and output nan if there's an error on a given column
            metric_val = []
            for ii in range(len(target)):
                try:
                    metric_val.append(self.metric(preds[ii], target[ii], **self.kwargs))
                except:
                    pass

            # Average the metric
            
            metric_val = self.nan_mean(torch.stack(metric_val))

        else:
            metric_val = self.metric(preds, target, **self.kwargs)
        return metric_val

    def __call__(self, preds: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        r"""
        Compute the metric with the method `self.compute`
        """
        return self.compute(preds, target)

    def __repr__(self):
        r"""
        Control how the class is printed
        """
        full_str = f"{self.metric.__name__}"

        return full_str
    
    def nan_mean(self, input: Tensor, **kwargs) -> Tensor:
        sum = torch.nansum(input, **kwargs)
        num = torch.sum(~torch.isnan(input), **kwargs)
        mean = sum / num
        return mean


================================================
FILE: train/metrics.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import numpy as np


def MAE(scores, targets):
    MAE = F.l1_loss(scores, targets)
    MAE = MAE.detach().item()
    return MAE


def accuracy_TU(scores, targets):
    scores = scores.detach().argmax(dim=1)
    acc = (scores==targets).float().sum().item()
    return acc


def accuracy_MNIST_CIFAR(scores, targets):
    scores = scores.detach().argmax(dim=1)
    acc = (scores==targets).float().sum().item()
    return acc

def accuracy_CITATION_GRAPH(scores, targets):
    scores = scores.detach().argmax(dim=1)
    acc = (scores==targets).float().sum().item()
    acc = acc / len(targets)
    return acc


def accuracy_SBM(scores, targets):
    S = targets.cpu().numpy()
    C = np.argmax( torch.nn.Softmax(dim=1)(scores).cpu().detach().numpy() , axis=1 )
    CM = confusion_matrix(S,C).astype(np.float32)
    nb_classes = CM.shape[0]
    targets = targets.cpu().detach().numpy()
    nb_non_empty_classes = 0
    pr_classes = np.zeros(nb_classes)
    for r in range(nb_classes):
        cluster = np.where(targets==r)[0]
        if cluster.shape[0] != 0:
            pr_classes[r] = CM[r,r]/ float(cluster.shape[0])
            if CM[r,r]>0:
                nb_non_empty_classes += 1
        else:
            pr_classes[r] = 0.0
    acc = 100.* np.sum(pr_classes)/ float(nb_classes)
    return acc


def binary_f1_score(scores, targets):
    """Computes the F1 score using scikit-learn for binary class labels. 
    
    Returns the F1 score for the positive class, i.e. labelled '1'.
    """
    y_true = targets.cpu().numpy()
    y_pred = scores.argmax(dim=1).cpu().numpy()
    return f1_score(y_true, y_pred, average='binary')

  
def accuracy_VOC(scores, targets):
    scores = scores.detach().argmax(dim=1).cpu()
    targets = targets.cpu().detach().numpy()
    acc = f1_score(scores, targets, average='weighted')
    return acc


================================================
FILE: train/train_SBMs_node_classification.py
================================================
"""
    Utility function for training one epoch 
    and evaluating one epoch
"""
import torch
import torch.nn as nn
import math
import dgl

from train.metrics import accuracy_SBM as accuracy

def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
    model.train()
    epoch_loss = 0
    epoch_train_acc = 0
    
    for iter, (batch_graphs, batch_labels) in enumerate(data_loader):

        batch_graphs = batch_graphs.to(device)
        batch_x = batch_graphs.ndata['feat'].to(device)
        batch_e = batch_graphs.edata['feat'].flatten().long().to(device)

        batch_labels = batch_labels.to(device)
        optimizer.zero_grad()  
        
        if LPE == 'node':
            batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
            #random sign flipping
            sign_flip = torch.rand(batch_EigVecs.size(1)).to(device)
            sign_flip[sign_flip>=0.5] = 1.0; sign_flip[sign_flip<0.5] = -1.0
            
            batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

        elif LPE == 'edge':
            batch_diff = batch_graphs.edata['diff'].to(device)
            batch_prod = batch_graphs.edata['product'].to(device)
            batch_EigVals = batch_graphs.edata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
        else:
            batch_scores = model.forward(batch_graphs, batch_x, batch_e)
            
        loss = model.loss(batch_scores, batch_labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
        epoch_train_acc += accuracy(batch_scores, batch_labels)
    epoch_loss /= (iter + 1)
    epoch_train_acc /= (iter + 1)
    
    return epoch_loss, epoch_train_acc, optimizer


def evaluate_network(model, device, data_loader, epoch, LPE):
    
    model.eval()
    epoch_test_loss = 0
    epoch_test_acc = 0

    with torch.no_grad():
        for iter, (batch_graphs, batch_labels) in enumerate(data_loader):
            batch_graphs = batch_graphs.to(device)
            batch_x = batch_graphs.ndata['feat'].to(device)
            batch_e = batch_graphs.edata['feat'].flatten().long().to(device)
            batch_labels = batch_labels.to(device)

            if LPE == 'node':
                batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
                batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

            elif LPE == 'edge':
                batch_diff = batch_graphs.edata['diff'].to(device)
                batch_prod = batch_graphs.edata['product'].to(device)
                batch_EigVals = batch_graphs.edata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
            else:
                batch_scores = model.forward(batch_graphs, batch_x, batch_e)
                
            loss = model.loss(batch_scores, batch_labels)
            epoch_test_loss += loss.detach().item()
            epoch_test_acc += accuracy(batch_scores, batch_labels)
            
        epoch_test_loss /= (iter + 1)
        epoch_test_acc /= (iter + 1)
        
    return epoch_test_loss, epoch_test_acc


================================================
FILE: train/train_ZINC_graph_regression.py
================================================
"""
    Utility function for training one epoch 
    and evaluating one epoch
"""
import torch
import torch.nn as nn
import math

from train.metrics import MAE

def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
    model.train()
    epoch_loss = 0
    epoch_train_mae = 0
    
    for iter, (batch_graphs, batch_targets) in enumerate(data_loader):

        batch_graphs = batch_graphs.to(device)
        batch_x = batch_graphs.ndata['feat'].to(device)
        batch_e = batch_graphs.edata['feat'].to(device)

        batch_targets = batch_targets.to(device)
        optimizer.zero_grad()  
        
        if LPE == 'node':
            batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
            #random sign flipping
            sign_flip = torch.rand(batch_EigVecs.size(1)).to(device)
            sign_flip[sign_flip>=0.5] = 1.0; sign_flip[sign_flip<0.5] = -1.0
            
            batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

        elif LPE == 'edge':
            batch_diff = batch_graphs.edata['diff'].to(device)
            batch_prod = batch_graphs.edata['product'].to(device)
            batch_EigVals = batch_graphs.edata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
        else:
            batch_scores = model.forward(batch_graphs, batch_x, batch_e)
            
        loss = model.loss(batch_scores, batch_targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
        epoch_train_mae += MAE(batch_scores, batch_targets)
    epoch_loss /= (iter + 1)
    epoch_train_mae /= (iter + 1)
    
    return epoch_loss, epoch_train_mae, optimizer

def evaluate_network(model, device, data_loader, epoch, LPE):
    model.eval()
    epoch_test_loss = 0
    epoch_test_mae = 0

    with torch.no_grad():
        for iter, (batch_graphs, batch_targets) in enumerate(data_loader):
            batch_graphs = batch_graphs.to(device)
            batch_x = batch_graphs.ndata['feat'].to(device)
            batch_e = batch_graphs.edata['feat'].to(device)
            batch_targets = batch_targets.to(device)

            if LPE == 'node':
                batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
                batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

            elif LPE == 'edge':
                batch_diff = batch_graphs.edata['diff'].to(device)
                batch_prod = batch_graphs.edata['product'].to(device)
                batch_EigVals = batch_graphs.edata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
            else:
                batch_scores = model.forward(batch_graphs, batch_x, batch_e)
                
            loss = model.loss(batch_scores, batch_targets)
            epoch_test_loss += loss.detach().item()
            epoch_test_mae += MAE(batch_scores, batch_targets)

        epoch_test_loss /= (iter + 1)
        epoch_test_mae /= (iter + 1)
        
    return epoch_test_loss, epoch_test_mae


================================================
FILE: train/train_molhiv.py
================================================
"""
    Utility functions for training one epoch 
    and evaluating one epoch
"""
import torch
import torch.nn as nn
import math

from ogb.graphproppred import Evaluator

def train_epoch(model, optimizer, device, data_loader, epoch, LPE):
    model.train()
    evaluator = Evaluator(name = "ogbg-molhiv")
    
    epoch_loss = 0
    epoch_train_auc = 0

    targets=torch.tensor([]).to(device)
    scores=torch.tensor([]).to(device)
    
    for iter, (batch_graphs, batch_targets) in enumerate(data_loader):
        
        batch_graphs = batch_graphs.to(device)
        batch_x = batch_graphs.ndata['feat'].to(device)  # num x feat
        batch_e = batch_graphs.edata['feat'].to(device)
        
        batch_targets = batch_targets.to(device)
        optimizer.zero_grad()  
        
        if LPE == 'node':
            
            batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
            #random sign flipping
            sign_flip = torch.rand(batch_EigVecs.size(1)).to(device)
            sign_flip[sign_flip>=0.5] = 1.0; sign_flip[sign_flip<0.5] = -1.0
            batch_EigVecs = batch_EigVecs * sign_flip.unsqueeze(0)
            
            batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

        elif LPE == 'edge':
            
            batch_diff = batch_graphs.edata['diff'].to(device)
            batch_prod = batch_graphs.edata['product'].to(device)
            batch_EigVals = batch_graphs.edata['EigVals'].to(device)
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
        else:
            batch_scores = model.forward(batch_graphs, batch_x, batch_e)
            

        targets = torch.cat((targets, batch_targets), 0)
        scores = torch.cat((scores, batch_scores), 0)
        
        loss = model.loss(batch_scores, batch_targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.detach().item()
        
    
    input_dict = {"y_true": targets, "y_pred": scores}
    epoch_train_auc = evaluator.eval(input_dict)['rocauc']  

    epoch_loss /= (iter + 1)
    
    return epoch_loss, epoch_train_auc, optimizer

def evaluate_network(model, device, data_loader, epoch, LPE):
    model.eval()
    evaluator = Evaluator(name = "ogbg-molhiv")
    
    epoch_test_loss = 0
    epoch_test_auc = 0
    
    targets=torch.tensor([]).to(device)
    scores=torch.tensor([]).to(device)
    
    with torch.no_grad():
        for iter, (batch_graphs, batch_targets) in enumerate(data_loader):
            batch_graphs = batch_graphs.to(device)
            batch_x = batch_graphs.ndata['feat'].to(device)  # num x feat
            batch_e = batch_graphs.edata['feat'].to(device)
            batch_targets = batch_targets.to(device)
        
            if LPE == 'node':
                batch_EigVecs = batch_graphs.ndata['EigVecs'].to(device)
                batch_EigVals = batch_graphs.ndata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

            elif LPE == 'edge':
                batch_diff = batch_graphs.edata['diff'].to(device)
                batch_prod = batch_graphs.edata['product'].to(device)
                batch_EigVals = batch_graphs.edata['EigVals'].to(device)
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)
            
            else:
                batch_scores = model.forward(batch_graphs, batch_x, batch_e)
                

            targets = torch.cat((targets, batch_targets), 0)
            scores = torch.cat((scores, batch_scores), 0)         
            
            loss = model.loss(batch_scores, batch_targets)
            epoch_test_loss += loss.detach().item()

            
    input_dict = {"y_true": targets, "y_pred": scores}
    epoch_test_auc = evaluator.eval(input_dict)['rocauc']
            
    epoch_test_loss /= (iter + 1)

    return epoch_test_loss, epoch_test_auc


================================================
FILE: train/train_molpcba.py
================================================
"""
    Utility functions for training one epoch
    and evaluating one epoch
"""
import torch
from torch._C import dtype
import torch.nn as nn
import math

from ogb.graphproppred import Evaluator
from train.MetricWrapper import MetricWrapper


def train_epoch(model, optimizer, device, data_loader, epoch, LPE, batch_accumulation):
    model.train()
    evaluator = Evaluator(name = "ogbg-molpcba")

    epoch_loss = 0

    targets=torch.tensor([])
    scores=torch.tensor([])

    wrapped_loss_fun = MetricWrapper(metric=model.loss, target_nan_mask="ignore-flatten")

    for iter, (batch_graphs, batch_targets) in enumerate(data_loader):
        # print(iter, torch.cuda.memory_allocated(0))
        batch_graphs = batch_graphs.to(device=device)
        batch_x = batch_graphs.ndata['feat']
        batch_e = batch_graphs.edata['feat']

        batch_targets = batch_targets.to(device)

        if LPE == 'node':

            batch_EigVecs = batch_graphs.ndata['EigVecs']
            #random sign flipping
            sign_flip = torch.rand(batch_EigVecs.size(1), device=device)
            sign_flip[sign_flip>=0.5] = 1.0; sign_flip[sign_flip<0.5] = -1.0
            batch_EigVecs = batch_EigVecs * sign_flip.unsqueeze(0)

            batch_EigVals = batch_graphs.ndata['EigVals']
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

        elif LPE == 'edge':

            batch_diff = batch_graphs.edata['diff']
            batch_prod = batch_graphs.edata['product']
            batch_EigVals = batch_graphs.edata['EigVals']
            batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)

        else:
            batch_scores = model.forward(batch_graphs, batch_x, batch_e)

        loss = wrapped_loss_fun(batch_scores, batch_targets)
        loss = loss / batch_accumulation
        loss.backward()

        # weights update
        if ((iter + 1) % batch_accumulation == 0) or (iter + 1 == len(data_loader)):
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss += loss.detach().item()

        targets = torch.cat((targets, batch_targets.detach().cpu()), 0)
        scores = torch.cat((scores, batch_scores.detach().cpu()), 0)


    input_dict = {"y_true": targets, "y_pred": scores}
    epoch_train_ap = evaluator.eval(input_dict)['ap']

    epoch_loss /= (iter + 1)

    return epoch_loss, epoch_train_ap, optimizer

def evaluate_network(model, device, data_loader, epoch, LPE):
    model.eval()
    evaluator = Evaluator(name = "ogbg-molpcba")

    epoch_test_loss = 0

    targets=torch.tensor([])
    scores=torch.tensor([])

    wrapped_loss_fun = MetricWrapper(metric=model.loss, target_nan_mask="ignore-flatten")

    with torch.no_grad():
        for iter, (batch_graphs, batch_targets) in enumerate(data_loader):
            batch_graphs = batch_graphs.to(device=device)
            batch_x = batch_graphs.ndata['feat']
            batch_e = batch_graphs.edata['feat']
            batch_targets = batch_targets.to(device=device)

            if LPE == 'node':
                batch_EigVecs = batch_graphs.ndata['EigVecs']
                batch_EigVals = batch_graphs.ndata['EigVals']
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_EigVecs, batch_EigVals)

            elif LPE == 'edge':
                batch_diff = batch_graphs.edata['diff']
                batch_prod = batch_graphs.edata['product']
                batch_EigVals = batch_graphs.edata['EigVals']
                batch_scores = model.forward(batch_graphs, batch_x, batch_e, batch_diff, batch_prod, batch_EigVals)

            else:
                batch_scores = model.forward(batch_graphs, batch_x, batch_e)

            loss = wrapped_loss_fun(batch_scores, batch_targets)
            epoch_test_loss += loss.detach().item()

            targets = torch.cat((targets, batch_targets.detach().cpu()), 0)
            scores = torch.cat((scores, batch_scores.detach().cpu()), 0)


    input_dict = {"y_true": targets, "y_pred": scores}
    epoch_test_ap = evaluator.eval(input_dict)['ap']

    epoch_test_loss /= (iter + 1)

    return epoch_test_loss, epoch_test_ap